1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 20:17:45 +00:00

Add cut_characters implementation, based on cut_bytes

This implementation uses rust's concept of characters and fails if the
input isn't valid utf-8. GNU cut implements '--characters' as an alias
for '--bytes' and thus has different semantics, for this option, from
this implemtation.
This commit is contained in:
polyphemus 2014-06-12 23:04:01 +02:00
parent 2ab586459b
commit 8b1ff08bd5
3 changed files with 196 additions and 130 deletions

View file

@ -13,6 +13,7 @@ PROGS := \
cksum \
comm \
cp \
cut \
dirname \
echo \
env \

View file

@ -1,11 +1,21 @@
#![crate_id(name="cut", vers="1.0.0", author="Rolf Morel")]
/*
* This file is part of the uutils coreutils package.
*
* (c) Rolf Morel <rolfmorel@gmail.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
#![feature(macro_rules)]
extern crate getopts;
extern crate libc;
use std::os;
use std::io::{print,stdin,stdout,File,BufferedWriter,BufferedReader};
use std::io::{print,File,BufferedWriter,BufferedReader,stdin};
use getopts::{optopt, optflag, getopts, usage};
use ranges::Range;
@ -52,17 +62,13 @@ fn list_to_ranges(list: &str, complement: bool) -> Result<Vec<Range>, String> {
Ok(range_vec)
}
fn cut_bytes(files: Vec<String>, ranges: Vec<Range>, opts: Options) -> int {
fn cut_bytes<T: Reader>(mut reader: BufferedReader<T>,
ranges: &Vec<Range>,
opts: &Options) -> int {
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
let (use_delim, out_delim) = match opts.out_delim {
let (use_delim, out_delim) = match opts.out_delim.clone() {
Some(delim) => (true, delim),
None => (false, "".to_string())
};
for filename in files.move_iter() {
let mut file = match open(&filename) {
Some(file) => file,
None => continue
None => (false, "".to_str())
};
let mut byte_pos = 0;
@ -70,19 +76,21 @@ fn cut_bytes(files: Vec<String>, ranges: Vec<Range>, opts: Options) -> int {
let mut range_pos = 0;
loop {
let byte = match file.read_u8() {
Ok(byte) => byte,
let mut byte = [0u8];
match reader.read(byte) {
Ok(1) => (),
Err(std::io::IoError{ kind: std::io::EndOfFile, ..}) => {
if byte_pos > 0 {
out.write_u8('\n' as u8);
out.write_u8('\n' as u8).unwrap();
}
break
}
_ => fail!(),
};
}
let byte = byte[0];
if byte == ('\n' as u8) {
out.write_u8('\n' as u8);
out.write_u8('\n' as u8).unwrap();
byte_pos = 0;
print_delim = false;
range_pos = 0;
@ -98,33 +106,142 @@ fn cut_bytes(files: Vec<String>, ranges: Vec<Range>, opts: Options) -> int {
if byte_pos >= cur_range.low {
if use_delim {
if print_delim && byte_pos == cur_range.low {
out.write_str(out_delim.as_slice());
out.write_str(out_delim.as_slice()).unwrap();
}
print_delim = true;
}
out.write_u8(byte);
}
out.write_u8(byte).unwrap();
}
}
}
return 0;
0
}
fn cut_charachters(files: Vec<String>, ranges: Vec<Range>,
opts: Options) -> int {
return 0;
fn cut_characters<T: Reader>(mut reader: BufferedReader<T>,
ranges: &Vec<Range>,
opts: &Options) -> int {
let mut out = BufferedWriter::new(std::io::stdio::stdout_raw());
let (use_delim, out_delim) = match opts.out_delim.clone() {
Some(delim) => (true, delim),
None => (false, "".to_str())
};
let mut char_pos = 0;
let mut print_delim = false;
let mut range_pos = 0;
loop {
let character = match reader.read_char() {
Ok(character) => character,
Err(std::io::IoError{ kind: std::io::EndOfFile, ..}) => {
if char_pos > 0 {
out.write_u8('\n' as u8).unwrap();
}
break
}
Err(std::io::IoError{ kind: std::io::InvalidInput, ..}) => {
fail!("Invalid utf8");
}
_ => fail!(),
};
if character == '\n' {
out.write_u8('\n' as u8).unwrap();
char_pos = 0;
print_delim = false;
range_pos = 0;
} else {
char_pos += 1;
if char_pos > ranges.get(range_pos).high {
range_pos += 1;
}
let cur_range = *ranges.get(range_pos);
if char_pos >= cur_range.low {
if use_delim {
if print_delim && char_pos == cur_range.low {
out.write_str(out_delim.as_slice()).unwrap();
}
print_delim = true;
}
out.write_char(character).unwrap();
}
}
}
0
}
fn cut_fields(files: Vec<String>, ranges: Vec<Range>,
opts: FieldOptions) -> int {
fn cut_fields<T: Reader>(reader: BufferedReader<T>,
ranges: &Vec<Range>,
opts: &FieldOptions) -> int {
for range in ranges.iter() {
println!("{}-{}", range.low, range.high);
}
return 0;
0
}
fn cut_files(mut filenames: Vec<String>, mode: Mode) -> int {
let mut stdin_read = false;
let mut exit_code = 0;
if filenames.len() == 0 { filenames.push("-".to_str()); }
for filename in filenames.iter() {
if filename.as_slice() == "-" {
if stdin_read { continue; }
exit_code |= match mode {
Bytes(ref ranges, ref opts) => {
cut_bytes(stdin(), ranges, opts)
}
Characters(ref ranges, ref opts) => {
cut_characters(stdin(), ranges, opts)
}
Fields(ref ranges, ref opts) => {
cut_fields(stdin(), ranges, opts)
}
};
stdin_read = true;
} else {
let path = Path::new(filename.as_slice());
if ! path.exists() {
show_error!("{}: No such file or directory", filename);
continue;
}
let buf_file = match File::open(&path) {
Ok(file) => BufferedReader::new(file),
Err(e) => {
show_error!("{0:s}: {1:s}", filename.as_slice(),
e.desc.to_str());
continue
}
};
exit_code |= match mode {
Bytes(ref ranges, ref opts) => cut_bytes(buf_file, ranges, opts),
Characters(ref ranges, ref opts) => {
cut_characters(buf_file, ranges, opts)
}
Fields(ref ranges, ref opts) => {
cut_fields(buf_file, ranges, opts)
}
};
}
}
exit_code
}
#[allow(dead_code)]
@ -145,10 +262,10 @@ pub fn uumain(args: Vec<String>) -> int {
optflag("", "version", "output version information and exit"),
];
let mut matches = match getopts(args.tail(), opts) {
let matches = match getopts(args.tail(), opts) {
Ok(m) => m,
Err(f) => {
show_error!(1, "Invalid options\n{}", f.to_err_msg())
show_error!("Invalid options\n{}", f.to_err_msg())
return 1;
}
};
@ -179,107 +296,53 @@ pub fn uumain(args: Vec<String>) -> int {
}
let complement = matches.opt_present("complement");
let mut out_delim = matches.opt_str("output-delimiter");
let mode = match (matches.opt_str("bytes"), matches.opt_str("characters"),
let mode_parse = match (matches.opt_str("bytes"),
matches.opt_str("characters"),
matches.opt_str("fields")) {
(Some(byte_ranges), None, None) => {
match list_to_ranges(byte_ranges.as_slice(), complement) {
Ok(ranges) => Bytes(ranges, Options{ out_delim: out_delim }),
Err(msg) => {
show_error!(1, "{}", msg);
return 1;
}
}
list_to_ranges(byte_ranges.as_slice(), complement).map(|ranges|
Bytes(ranges,
Options{ out_delim: matches.opt_str("output-delimiter") })
)
}
(None ,Some(char_ranges), None) => {
match list_to_ranges(char_ranges.as_slice(), complement) {
Ok(ranges) => Characters(ranges,
Options{ out_delim: out_delim }),
Err(msg) => {
show_error!(1, "{}", msg);
return 1;
}
}
list_to_ranges(char_ranges.as_slice(), complement).map(|ranges|
Characters(ranges,
Options{ out_delim: matches.opt_str("output-delimiter") })
)
}
(None, None ,Some(field_ranges)) => {
match list_to_ranges(field_ranges.as_slice(), complement) {
Ok(ranges) => {
list_to_ranges(field_ranges.as_slice(), complement).map(|ranges|
{
use std::str::from_char;
let only_delimited = matches.opt_present("only-delimited");
let delim = matches.opt_str("delimiter")
.filtered(|s| s.len() == 1)
.map(|s| s.as_slice().char_at(0))
.unwrap_or('\t');
if out_delim.is_none() {
out_delim = Some(from_char(delim));
}
let out_delim = matches.opt_str("output-delimiter")
.unwrap_or(from_char(delim));
let only_delimited = matches.opt_present("only-delimited");
Fields(ranges,
FieldOptions{ delimiter: delim,
out_delimeter: out_delim.unwrap(),
out_delimeter: out_delim,
only_delimited: only_delimited })
}
Err(msg) => {
show_error!(1, "{}", msg);
return 1;
}
}
)
}
(ref b, ref c, ref f) if b.is_some() || c.is_some() || f.is_some() => {
crash!(1, "only one type of list may be specified");
Err("only one type of list may be specified".to_str())
}
_ => crash!(1, "you must specify a list of bytes, characters, or fields")
_ => Err("you must specify a list of bytes, characters, or fields".to_str())
};
match mode {
Bytes(..) | Characters(..) => {
if matches.opt_present("delimiter") {
show_error!(1, "an input delimiter may be specified only when operating on fields");
return 1;
match mode_parse {
Ok(mode) => cut_files(matches.free, mode),
Err(err_msg) => {
show_error!("{}", err_msg);
1
}
if matches.opt_present("only-delimited") {
show_error!(1, "suppressing non-delimited lines makes sense only when operating on fields");
return 1;
}
}
_ => ()
}
for filename in matches.free.iter() {
if ! (filename.as_slice() == "-" ||
Path::new(filename.as_slice()).exists()) {
show_error!(1, "{}: No such file or directory", filename);
return 1;
}
}
if matches.free.len() == 0 { matches.free.push("-".to_string()); }
match mode {
Bytes(ranges, opts) => return cut_bytes(matches.free, ranges, opts),
Characters(ranges, opts) => return cut_charachters(matches.free,
ranges, opts),
Fields(ranges, opts) => return cut_fields(matches.free, ranges, opts),
}
}
fn open(path: &String) -> Option<BufferedReader<Box<Reader>>> {
if "-" == path.as_slice() {
let reader = box stdin() as Box<Reader>;
return Some(BufferedReader::new(reader));
}
match File::open(&std::path::Path::new(path.as_slice())) {
Ok(fd) => {
let reader = box fd as Box<Reader>;
return Some(BufferedReader::new(reader));
},
Err(e) => {
show_error!(1, "{0:s}: {1:s}", *path, e.desc.to_str());
}
}
None
}

View file

@ -18,6 +18,7 @@ extern crate chroot;
extern crate cksum;
extern crate comm;
extern crate cp;
extern crate cut;
extern crate dirname;
extern crate du;
extern crate echo;
@ -80,6 +81,7 @@ fn util_map() -> HashMap<&str, fn(Vec<String>) -> int> {
map.insert("cksum", cksum::uumain);
map.insert("comm", comm::uumain);
map.insert("cp", cp::uumain);
map.insert("cut", cut::uumain);
map.insert("dirname", dirname::uumain);
map.insert("du", du::uumain);
map.insert("echo", echo::uumain);