From d7b2372c3aba35683793d45c838dd9d163dc20ed Mon Sep 17 00:00:00 2001 From: Michael Gehring Date: Wed, 9 Jul 2014 10:29:50 +0200 Subject: [PATCH 1/9] ToStr::to_str -> ToString::to_string --- base64/base64.rs | 12 ++++++------ cat/cat.rs | 2 +- common/util.rs | 8 ++++---- cp/cp.rs | 14 +++++++------- cut/cut.rs | 14 +++++++------- kill/kill.rs | 4 ++-- mkdir/mkdir.rs | 2 +- mkuutils.rs | 2 +- nl/nl.rs | 4 ++-- paste/paste.rs | 6 +++--- rm/rm.rs | 6 +++--- rmdir/rmdir.rs | 4 ++-- seq/seq.rs | 4 ++-- tac/tac.rs | 2 +- tee/tee.rs | 2 +- truncate/truncate.rs | 10 +++++----- uptime/uptime.rs | 2 +- wc/wc.rs | 4 ++-- 18 files changed, 51 insertions(+), 51 deletions(-) diff --git a/base64/base64.rs b/base64/base64.rs index fe20cfeb1..36e9ec796 100644 --- a/base64/base64.rs +++ b/base64/base64.rs @@ -92,9 +92,9 @@ pub fn uumain(args: Vec) -> int { } fn decode(input: &mut Reader, ignore_garbage: bool) { - let mut to_decode = match input.read_to_str() { + let mut to_decode = match input.read_to_string() { Ok(m) => m, - Err(f) => fail!(f.to_str()) + Err(f) => fail!(f.to_string()) }; to_decode = str::replace(to_decode.as_slice(), "\n", ""); @@ -112,15 +112,15 @@ fn decode(input: &mut Reader, ignore_garbage: bool) { match out.write(bytes.as_slice()) { Ok(_) => {} - Err(f) => { crash!(1, "{}", f.to_str()); } + Err(f) => { crash!(1, "{}", f.to_string()); } } match out.flush() { Ok(_) => {} - Err(f) => { crash!(1, "{}", f.to_str()); } + Err(f) => { crash!(1, "{}", f.to_string()); } } } Err(s) => { - error!("error: {}", s.to_str()); + error!("error: {}", s.to_string()); fail!() } } @@ -137,7 +137,7 @@ fn encode(input: &mut Reader, line_wrap: uint) { }; let to_encode = match input.read_to_end() { Ok(m) => m, - Err(f) => fail!(f.to_str()) + Err(f) => fail!(f.to_string()) }; let encoded = to_encode.as_slice().to_base64(b64_conf); diff --git a/cat/cat.rs b/cat/cat.rs index 01395bf55..26a81c6f4 100644 --- a/cat/cat.rs +++ b/cat/cat.rs @@ -284,7 +284,7 @@ fn open(path: &str) -> Option<(Box, bool)> { match File::open(&std::path::Path::new(path)) { Ok(f) => Some((box f as Box, false)), Err(e) => { - (writeln!(stderr(), "cat: {0:s}: {1:s}", path, e.to_str())).unwrap(); + (writeln!(stderr(), "cat: {0:s}: {1:s}", path, e.to_string())).unwrap(); None }, } diff --git a/common/util.rs b/common/util.rs index ffd10be80..90deafb80 100644 --- a/common/util.rs +++ b/common/util.rs @@ -59,7 +59,7 @@ macro_rules! crash_if_err( ($exitcode:expr, $exp:expr) => ( match $exp { Ok(m) => m, - Err(f) => crash!($exitcode, "{}", f.to_str()) + Err(f) => crash!($exitcode, "{}", f.to_string()) } ) ) @@ -69,7 +69,7 @@ macro_rules! safe_write( ($fd:expr, $($args:expr),+) => ( match write!($fd, $($args),+) { Ok(_) => {} - Err(f) => { fail!(f.to_str()); } + Err(f) => { fail!(f.to_string()); } } ) ) @@ -79,7 +79,7 @@ macro_rules! safe_writeln( ($fd:expr, $($args:expr),+) => ( match writeln!($fd, $($args),+) { Ok(_) => {} - Err(f) => { fail!(f.to_str()); } + Err(f) => { fail!(f.to_string()); } } ) ) @@ -89,7 +89,7 @@ macro_rules! safe_unwrap( ($exp:expr) => ( match $exp { Ok(m) => m, - Err(f) => crash!(1, "{}", f.to_str()) + Err(f) => crash!(1, "{}", f.to_string()) } ) ) diff --git a/cp/cp.rs b/cp/cp.rs index 52912feac..43848adc6 100644 --- a/cp/cp.rs +++ b/cp/cp.rs @@ -102,15 +102,15 @@ fn copy(matches: getopts::Matches) { Err(e) => if e.kind == io::FileNotFound { false } else { - error!("error: {:s}", e.to_str()); + error!("error: {:s}", e.to_string()); fail!() } }; if same_file { error!("error: \"{:s}\" and \"{:s}\" are the same file", - source.display().to_str(), - dest.display().to_str()); + source.display().to_string(), + dest.display().to_string()); fail!(); } @@ -118,7 +118,7 @@ fn copy(matches: getopts::Matches) { if io_result.is_err() { let err = io_result.unwrap_err(); - error!("error: {:s}", err.to_str()); + error!("error: {:s}", err.to_string()); fail!(); } } else { @@ -129,7 +129,7 @@ fn copy(matches: getopts::Matches) { for source in sources.iter() { if fs::stat(source).unwrap().kind != io::TypeFile { - error!("error: \"{:s}\" is not a file", source.display().to_str()); + error!("error: \"{:s}\" is not a file", source.display().to_string()); continue; } @@ -137,13 +137,13 @@ fn copy(matches: getopts::Matches) { full_dest.push(source.filename_str().unwrap()); - println!("{:s}", full_dest.display().to_str()); + println!("{:s}", full_dest.display().to_string()); let io_result = fs::copy(source, &full_dest); if io_result.is_err() { let err = io_result.unwrap_err(); - error!("error: {:s}", err.to_str()); + error!("error: {:s}", err.to_string()); fail!() } } diff --git a/cut/cut.rs b/cut/cut.rs index 1addd7a04..d53e30f00 100644 --- a/cut/cut.rs +++ b/cut/cut.rs @@ -56,7 +56,7 @@ fn cut_bytes(mut reader: BufferedReader, let mut out = BufferedWriter::new(std::io::stdio::stdout_raw()); let (use_delim, out_delim) = match opts.out_delim.clone() { Some(delim) => (true, delim), - None => (false, "".to_str()) + None => (false, "".to_string()) }; 'newline: loop { @@ -106,7 +106,7 @@ fn cut_characters(mut reader: BufferedReader, let mut out = BufferedWriter::new(std::io::stdio::stdout_raw()); let (use_delim, out_delim) = match opts.out_delim.clone() { Some(delim) => (true, delim), - None => (false, "".to_str()) + None => (false, "".to_string()) }; 'newline: loop { @@ -363,7 +363,7 @@ fn cut_files(mut filenames: Vec, mode: Mode) -> int { let mut stdin_read = false; let mut exit_code = 0; - if filenames.len() == 0 { filenames.push("-".to_str()); } + if filenames.len() == 0 { filenames.push("-".to_string()); } for filename in filenames.iter() { if filename.as_slice() == "-" { @@ -486,7 +486,7 @@ pub fn uumain(args: Vec) -> int { match matches.opt_str("delimiter") { Some(delim) => { if delim.as_slice().char_len() != 1 { - Err("the delimiter must be a single character".to_str()) + Err("the delimiter must be a single character".to_string()) } else { Ok(Fields(ranges, FieldOptions { @@ -498,7 +498,7 @@ pub fn uumain(args: Vec) -> int { } None => Ok(Fields(ranges, FieldOptions { - delimiter: "\t".to_str(), + delimiter: "\t".to_string(), out_delimeter: out_delim, only_delimited: only_delimited })) @@ -507,9 +507,9 @@ pub fn uumain(args: Vec) -> int { ) } (ref b, ref c, ref f) if b.is_some() || c.is_some() || f.is_some() => { - Err("only one type of list may be specified".to_str()) + Err("only one type of list may be specified".to_string()) } - _ => Err("you must specify a list of bytes, characters, or fields".to_str()) + _ => Err("you must specify a list of bytes, characters, or fields".to_string()) }; match mode_parse { diff --git a/kill/kill.rs b/kill/kill.rs index 663d07718..ee575c3fa 100644 --- a/kill/kill.rs +++ b/kill/kill.rs @@ -125,7 +125,7 @@ fn print_signal(signal_name_or_value: &str) { if signal.name == signal_name_or_value || (format!("SIG{}", signal.name).as_slice()) == signal_name_or_value { println!("{}", signal.value) exit!(EXIT_OK as i32) - } else if signal_name_or_value == signal.value.to_str().as_slice() { + } else if signal_name_or_value == signal.value.to_string().as_slice() { println!("{}", signal.name); exit!(EXIT_OK as i32) } @@ -169,7 +169,7 @@ fn signal_by_name_or_value(signal_name_or_value: &str) -> Option { } for signal in ALL_SIGNALS.iter() { let long_name = format!("SIG{}", signal.name); - if signal.name == signal_name_or_value || (signal_name_or_value == signal.value.to_str().as_slice()) || (long_name.as_slice() == signal_name_or_value) { + if signal.name == signal_name_or_value || (signal_name_or_value == signal.value.to_string().as_slice()) || (long_name.as_slice() == signal_name_or_value) { return Some(signal.value); } } diff --git a/mkdir/mkdir.rs b/mkdir/mkdir.rs index 0ec032538..a0c1966de 100644 --- a/mkdir/mkdir.rs +++ b/mkdir/mkdir.rs @@ -157,7 +157,7 @@ fn mkdir(path: &Path, mode: FilePermission) { match fs::mkdir(path, mode) { Ok(_) => {}, Err(e) => { - crash!(1, "test {}", e.to_str()); + crash!(1, "test {}", e.to_string()); } } } diff --git a/mkuutils.rs b/mkuutils.rs index 67b7ca15c..78b35303b 100644 --- a/mkuutils.rs +++ b/mkuutils.rs @@ -39,7 +39,7 @@ fn main() { // XXX: this all just assumes that the IO works correctly let mut out = File::open_mode(&Path::new(outfile), Truncate, Write).unwrap(); let mut input = File::open(&Path::new("uutils/uutils.rs")).unwrap(); - let main = input.read_to_str().unwrap().replace("@CRATES@", crates.as_slice()).replace("@UTIL_MAP@", util_map.as_slice()); + let main = input.read_to_string().unwrap().replace("@CRATES@", crates.as_slice()).replace("@UTIL_MAP@", util_map.as_slice()); match out.write(main.as_bytes()) { Err(e) => fail!("{}", e), diff --git a/nl/nl.rs b/nl/nl.rs index 3c72e8bba..8e45bdae6 100644 --- a/nl/nl.rs +++ b/nl/nl.rs @@ -158,9 +158,9 @@ pub fn uumain(args: Vec) -> int { // nl implements the main functionality for an individual buffer. fn nl (reader: &mut BufferedReader, settings: &Settings) { let mut line_no = settings.starting_line_number; - // The current line number's width as a string. Using to_str is inefficient + // The current line number's width as a string. Using to_string is inefficient // but since we only do it once, it should not hurt. - let mut line_no_width = line_no.to_str().len(); + let mut line_no_width = line_no.to_string().len(); let line_no_width_initial = line_no_width; // Stores the smallest integer with one more digit than line_no, so that // when line_no >= line_no_threshold, we need to use one more digit. diff --git a/paste/paste.rs b/paste/paste.rs index fa3b6b353..0ee655a6e 100644 --- a/paste/paste.rs +++ b/paste/paste.rs @@ -66,7 +66,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) { } ) ).collect(); - let delimiters: Vec = delimiters.chars().map(|x| x.to_str()).collect(); + let delimiters: Vec = delimiters.chars().map(|x| x.to_string()).collect(); let mut delim_count = 0; if serial { for file in files.mut_iter() { @@ -80,7 +80,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) { Err(f) => if f.kind == io::EndOfFile { break } else { - crash!(1, "{}", f.to_str()) + crash!(1, "{}", f.to_string()) } } delim_count += 1; @@ -102,7 +102,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) { *eof.get_mut(i) = true; eof_count += 1; } else { - crash!(1, "{}", f.to_str()); + crash!(1, "{}", f.to_string()); } } } diff --git a/rm/rm.rs b/rm/rm.rs index 203cfc870..396bbb18d 100644 --- a/rm/rm.rs +++ b/rm/rm.rs @@ -136,7 +136,7 @@ fn remove(files: Vec, force: bool, interactive: InteractiveMode, one_fs: let walk_dir = match fs::walk_dir(&file) { Ok(m) => m, Err(f) => { - crash!(1, "{}", f.to_str()); + crash!(1, "{}", f.to_string()); } }; r = remove(walk_dir.map(|x| x.as_str().unwrap().to_string()).collect(), force, interactive, one_fs, preserve_root, recursive, dir, verbose).and(r); @@ -177,7 +177,7 @@ fn remove_dir(path: &Path, name: &str, interactive: InteractiveMode, verbose: bo match fs::rmdir(path) { Ok(_) => if verbose { println!("Removed '{}'", name); }, Err(f) => { - show_error!("{}", f.to_str()); + show_error!("{}", f.to_string()); return Err(1); } } @@ -197,7 +197,7 @@ fn remove_file(path: &Path, name: &str, interactive: InteractiveMode, verbose: b match fs::unlink(path) { Ok(_) => if verbose { println!("Removed '{}'", name); }, Err(f) => { - show_error!("{}", f.to_str()); + show_error!("{}", f.to_string()); return Err(1); } } diff --git a/rmdir/rmdir.rs b/rmdir/rmdir.rs index 5976ff66d..2f48c3472 100644 --- a/rmdir/rmdir.rs +++ b/rmdir/rmdir.rs @@ -90,7 +90,7 @@ fn remove_dir(path: &Path, dir: &str, ignore: bool, parents: bool, verbose: bool let mut walk_dir = match fs::walk_dir(path) { Ok(m) => m, Err(f) => { - show_error!("{}", f.to_str()); + show_error!("{}", f.to_string()); return Err(1); } }; @@ -111,7 +111,7 @@ fn remove_dir(path: &Path, dir: &str, ignore: bool, parents: bool, verbose: bool } } Err(f) => { - show_error!("{}", f.to_str()); + show_error!("{}", f.to_string()); r = Err(1); } } diff --git a/seq/seq.rs b/seq/seq.rs index 34ea1efd6..da33e9c2c 100644 --- a/seq/seq.rs +++ b/seq/seq.rs @@ -24,7 +24,7 @@ macro_rules! pipe_write( if f.kind == io::BrokenPipe { return } else { - fail!("{}", f.to_str()) + fail!("{}", f.to_string()) } } ) @@ -232,7 +232,7 @@ fn print_seq(first: f64, step: f64, last: f64, largest_dec: uint, separator: Str let mut i = 0i; let mut value = first + i as f64 * step; while !done_printing(value, step, last) { - let istr = value.to_str(); + let istr = value.to_string(); let ilen = istr.len(); let before_dec = istr.as_slice().find('.').unwrap_or(ilen); if pad && before_dec < padding { diff --git a/tac/tac.rs b/tac/tac.rs index 001bf1fea..670704e73 100644 --- a/tac/tac.rs +++ b/tac/tac.rs @@ -78,7 +78,7 @@ fn tac(filenames: Vec, before: bool, _: bool, separator: &str) { box crash_if_err!(1, io::File::open(&Path::new(filename))) as Box } ); - let mut data = crash_if_err!(1, file.read_to_str()); + let mut data = crash_if_err!(1, file.read_to_string()); if data.as_slice().ends_with("\n") { // removes blank line that is inserted otherwise let mut buf = data.into_string(); diff --git a/tee/tee.rs b/tee/tee.rs index b63eb868b..77ebbc598 100644 --- a/tee/tee.rs +++ b/tee/tee.rs @@ -144,7 +144,7 @@ impl Reader for NamedReader { fn with_path(path: &Path, cb: || -> IoResult) -> IoResult { match cb() { - Err(f) => { warn(format!("{}: {}", path.display(), f.to_str()).as_slice()); Err(f) } + Err(f) => { warn(format!("{}: {}", path.display(), f.to_string()).as_slice()); Err(f) } okay => okay } } diff --git a/truncate/truncate.rs b/truncate/truncate.rs index 1698b616f..141a97cbe 100644 --- a/truncate/truncate.rs +++ b/truncate/truncate.rs @@ -105,13 +105,13 @@ fn truncate(no_create: bool, _: bool, reference: Option, size: Option m, Err(f) => { - crash!(1, "{}", f.to_str()) + crash!(1, "{}", f.to_string()) } }; match fs::stat(rfile.path()) { Ok(stat) => (stat.size, Reference), Err(f) => { - show_error!("{}", f.to_str()); + show_error!("{}", f.to_string()); return Err(1); } } @@ -127,7 +127,7 @@ fn truncate(no_create: bool, _: bool, reference: Option, size: Option stat.size, Err(f) => { - show_warning!("{}", f.to_str()); + show_warning!("{}", f.to_string()); continue; } }; @@ -143,13 +143,13 @@ fn truncate(no_create: bool, _: bool, reference: Option, size: Option {} Err(f) => { - show_error!("{}", f.to_str()); + show_error!("{}", f.to_string()); return Err(1); } } } Err(f) => { - show_error!("{}", f.to_str()); + show_error!("{}", f.to_string()); return Err(1); } } diff --git a/uptime/uptime.rs b/uptime/uptime.rs index 774d46e78..cdd1b82c2 100644 --- a/uptime/uptime.rs +++ b/uptime/uptime.rs @@ -154,7 +154,7 @@ fn print_time() { fn get_uptime(boot_time: Option) -> i64 { let proc_uptime = File::open(&Path::new("/proc/uptime")) - .read_to_str(); + .read_to_string(); let uptime_text = match proc_uptime { Ok(s) => s, diff --git a/wc/wc.rs b/wc/wc.rs index a3f01ca3c..110369659 100644 --- a/wc/wc.rs +++ b/wc/wc.rs @@ -177,7 +177,7 @@ pub fn wc(files: Vec, matches: &Matches) -> StdResult<(), int> { } // used for formatting - max_str_len = total_byte_count.to_str().len(); + max_str_len = total_byte_count.to_string().len(); } for result in results.iter() { @@ -240,7 +240,7 @@ fn open(path: String) -> StdResult>, int> { Ok(BufferedReader::new(reader)) }, Err(e) => { - show_error!("wc: {0:s}: {1:s}", path, e.desc.to_str()); + show_error!("wc: {0:s}: {1:s}", path, e.desc.to_string()); Err(1) } } From f06d9fe1303c37adaf1c82dee8b49deb813d1e7a Mon Sep 17 00:00:00 2001 From: polyphemus Date: Sat, 5 Jul 2014 19:57:18 +0200 Subject: [PATCH 2/9] Rewrite cut_bytes(), more performant than GNU Creates BufReader in buffer.rs. BufReader uses a stack allocated buffer to read into and returns selected slices into the buffer. This does away with any dynamic allocations in the 'newline loop. 1.5 to 2.5 more performant than previous version. 1.5 to 2.0 times more performant than GNU. --- cut/buffer.rs | 134 +++++++++++++++++++++++++++++++++++++++++++++ cut/cut.rs | 149 ++++++++++++++++++++++++++++++-------------------- 2 files changed, 224 insertions(+), 59 deletions(-) create mode 100644 cut/buffer.rs diff --git a/cut/buffer.rs b/cut/buffer.rs new file mode 100644 index 000000000..9fe691c3e --- /dev/null +++ b/cut/buffer.rs @@ -0,0 +1,134 @@ +use std; +use std::io::{IoResult, IoError}; + +pub struct BufReader { + reader: R, + buffer: [u8, ..4096], + start: uint, + end: uint, // exclusive +} + +pub mod Bytes { + pub trait Select { + fn select<'a>(&'a mut self, bytes: uint) -> Selected<'a>; + } + + pub enum Selected<'a> { + NewlineFound(&'a [u8]), + Complete(&'a [u8]), + Partial(&'a [u8]), + EndOfFile, + } +} + +impl BufReader { + pub fn new(reader: R) -> BufReader { + let empty_buffer = unsafe { + std::mem::uninitialized::<[u8, ..4096]>() + }; + + BufReader { + reader: reader, + buffer: empty_buffer, + start: 0, + end: 0, + } + } + + fn read(&mut self) -> IoResult { + let buf_len = self.buffer.len(); + let buffer_fill = self.buffer.mut_slice(self.end, buf_len); + + match self.reader.read(buffer_fill) { + Ok(nread) => { + self.end += nread; + Ok(nread) + } + error => error + } + } + + #[inline] + fn maybe_fill_buf(&mut self) -> IoResult { + if self.end == self.start { + self.start = 0; + self.end = 0; + } + + if self.end <= 2048 { self.read() } else { Ok(0) } + } + + pub fn consume_line(&mut self) -> uint { + let mut bytes_consumed = 0; + + loop { + match self.maybe_fill_buf() { + Err(IoError { kind: std::io::EndOfFile, .. }) => (), + Err(err) => fail!("read error: {}", err.desc), + _ => () + } + + let buffer_used = self.end - self.start; + + if buffer_used == 0 { return bytes_consumed; } + + for idx in range(self.start, self.end) { + if self.buffer[idx] == b'\n' { + self.start = idx + 1; + return bytes_consumed + idx + 1; + } + } + + bytes_consumed += buffer_used; + + self.start = 0; + self.end = 0; + } + } +} + +impl Bytes::Select for BufReader { + fn select<'a>(&'a mut self, bytes: uint) -> Bytes::Selected<'a> { + match self.maybe_fill_buf() { + Err(IoError { kind: std::io::EndOfFile, .. }) => (), + Err(err) => fail!("read error: {}", err.desc), + _ => () + } + + let buffer_used = self.end - self.start; + + if buffer_used == 0 { return Bytes::EndOfFile; } + + let (complete, max_segment_len) = { + if bytes < buffer_used { + (true, bytes + 1) + } else { + (false, buffer_used) + } + }; + + for idx in range(self.start, self.start + max_segment_len) { + if self.buffer[idx] == b'\n' { + let segment = self.buffer.slice(self.start, idx + 1); + + self.start = idx + 1; + + return Bytes::NewlineFound(segment); + } + } + + if complete { + let segment = self.buffer.slice(self.start, + self.start + bytes); + + self.start += bytes; + Bytes::Complete(segment) + } else { + let segment = self.buffer.slice(self.start, self.end); + + self.start = 0; + self.end = 0; + Bytes::Partial(segment) + } + } +} diff --git a/cut/cut.rs b/cut/cut.rs index d53e30f00..dce917473 100644 --- a/cut/cut.rs +++ b/cut/cut.rs @@ -14,7 +14,7 @@ extern crate getopts; extern crate libc; -use std::io::{File, BufferedWriter, BufferedReader, stdin, print}; +use std::io::{stdio, File, BufferedWriter, BufferedReader, print}; use getopts::{optopt, optflag, getopts, usage}; use ranges::Range; @@ -22,6 +22,7 @@ use ranges::Range; #[path = "../common/util.rs"] mod util; mod ranges; +mod buffer; static NAME: &'static str = "cut"; static VERSION: &'static str = "1.0.0"; @@ -50,67 +51,94 @@ fn list_to_ranges(list: &str, complement: bool) -> Result, String> { } } -fn cut_bytes(mut reader: BufferedReader, +fn cut_bytes(reader: R, ranges: &Vec, opts: &Options) -> int { - let mut out = BufferedWriter::new(std::io::stdio::stdout_raw()); - let (use_delim, out_delim) = match opts.out_delim.clone() { - Some(delim) => (true, delim), - None => (false, "".to_string()) - }; + use buffer::Bytes::{Select, NewlineFound, Complete, Partial, EndOfFile}; + + let mut buf_read = buffer::BufReader::new(reader); + let mut out = BufferedWriter::new(stdio::stdout_raw()); 'newline: loop { - let line = match reader.read_until(b'\n') { - Ok(line) => line, - Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break, - _ => fail!(), - }; - - let line_len = line.len(); + let mut cur_pos = 1; let mut print_delim = false; for &Range { low: low, high: high } in ranges.iter() { - if low > line_len { break; } + // skip upto low + let orig_pos = cur_pos; + loop { + match buf_read.select(low - cur_pos) { + NewlineFound(_) => { + out.write(&[b'\n']).unwrap(); + continue 'newline + } + Complete(bytes) => { + cur_pos += bytes.len(); + break + } + Partial(bytes) => cur_pos += bytes.len(), + EndOfFile => { + if orig_pos != cur_pos { + out.write(&[b'\n']).unwrap(); + } - if use_delim { - if print_delim { - out.write_str(out_delim.as_slice()).unwrap(); + break 'newline + } } - print_delim = true; } - if high >= line_len { - let segment = line.slice(low - 1, line_len); - - out.write(segment).unwrap(); - - if *line.get(line_len - 1) == b'\n' { - continue 'newline + match opts.out_delim { + Some(ref delim) => { + if print_delim { + out.write(delim.as_bytes()).unwrap(); + } + print_delim = true; } - } else { - let segment = line.slice(low - 1, high); + None => () + } - out.write(segment).unwrap(); + // write out from low to high + loop { + match buf_read.select(high - cur_pos + 1) { + NewlineFound(bytes) => { + out.write(bytes).unwrap(); + continue 'newline + } + Complete(bytes) => { + out.write(bytes).unwrap(); + cur_pos = high + 1; + break + } + Partial(bytes) => { + cur_pos += bytes.len(); + out.write(bytes).unwrap(); + } + EndOfFile => { + if cur_pos != low || low == high { + out.write(&[b'\n']).unwrap(); + } + + break 'newline + } + } } } - out.write(&[b'\n']).unwrap(); + buf_read.consume_line(); + out.write([b'\n']).unwrap(); } 0 } -fn cut_characters(mut reader: BufferedReader, +fn cut_characters(reader: R, ranges: &Vec, opts: &Options) -> int { - let mut out = BufferedWriter::new(std::io::stdio::stdout_raw()); - let (use_delim, out_delim) = match opts.out_delim.clone() { - Some(delim) => (true, delim), - None => (false, "".to_string()) - }; + let mut buf_in = BufferedReader::new(reader); + let mut out = BufferedWriter::new(stdio::stdout_raw()); 'newline: loop { - let line = match reader.read_line() { + let line = match buf_in.read_line() { Ok(line) => line, Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break, _ => fail!(), @@ -126,11 +154,14 @@ fn cut_characters(mut reader: BufferedReader, None => break }; - if use_delim { - if print_delim { - out.write_str(out_delim.as_slice()).unwrap(); + match opts.out_delim { + Some(ref delim) => { + if print_delim { + out.write(delim.as_bytes()).unwrap(); + } + print_delim = true; } - print_delim = true; + None => () } match char_indices.nth(high - low) { @@ -204,15 +235,16 @@ impl<'a> Iterator<(uint, uint)> for Searcher<'a> { } } -fn cut_fields_delimiter(mut reader: BufferedReader, +fn cut_fields_delimiter(reader: R, ranges: &Vec, delim: &String, only_delimited: bool, out_delim: &String) -> int { - let mut out = BufferedWriter::new(std::io::stdio::stdout_raw()); + let mut buf_in = BufferedReader::new(reader); + let mut out = BufferedWriter::new(stdio::stdout_raw()); 'newline: loop { - let line = match reader.read_until(b'\n') { + let line = match buf_in.read_until(b'\n') { Ok(line) => line, Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break, _ => fail!(), @@ -279,7 +311,7 @@ fn cut_fields_delimiter(mut reader: BufferedReader, 0 } -fn cut_fields(mut reader: BufferedReader, +fn cut_fields(reader: R, ranges: &Vec, opts: &FieldOptions) -> int { match opts.out_delimeter { @@ -290,10 +322,11 @@ fn cut_fields(mut reader: BufferedReader, None => () } - let mut out = BufferedWriter::new(std::io::stdio::stdout_raw()); + let mut buf_in = BufferedReader::new(reader); + let mut out = BufferedWriter::new(stdio::stdout_raw()); 'newline: loop { - let line = match reader.read_until(b'\n') { + let line = match buf_in.read_until(b'\n') { Ok(line) => line, Err(std::io::IoError { kind: std::io::EndOfFile, .. }) => break, _ => fail!(), @@ -367,17 +400,17 @@ fn cut_files(mut filenames: Vec, mode: Mode) -> int { for filename in filenames.iter() { if filename.as_slice() == "-" { - if stdin_read { continue; } + if stdin_read { continue } exit_code |= match mode { Bytes(ref ranges, ref opts) => { - cut_bytes(stdin(), ranges, opts) + cut_bytes(stdio::stdin_raw(), ranges, opts) } Characters(ref ranges, ref opts) => { - cut_characters(stdin(), ranges, opts) + cut_characters(stdio::stdin_raw(), ranges, opts) } Fields(ref ranges, ref opts) => { - cut_fields(stdin(), ranges, opts) + cut_fields(stdio::stdin_raw(), ranges, opts) } }; @@ -387,11 +420,11 @@ fn cut_files(mut filenames: Vec, mode: Mode) -> int { if ! path.exists() { show_error!("{}: No such file or directory", filename); - continue; + continue } - let buf_file = match File::open(&path) { - Ok(file) => BufferedReader::new(file), + let file = match File::open(&path) { + Ok(f) => f, Err(e) => { show_error!("{}: {}", filename, e.desc); continue @@ -399,13 +432,11 @@ fn cut_files(mut filenames: Vec, mode: Mode) -> int { }; exit_code |= match mode { - Bytes(ref ranges, ref opts) => cut_bytes(buf_file, ranges, opts), + Bytes(ref ranges, ref opts) => cut_bytes(file, ranges, opts), Characters(ref ranges, ref opts) => { - cut_characters(buf_file, ranges, opts) - } - Fields(ref ranges, ref opts) => { - cut_fields(buf_file, ranges, opts) + cut_characters(file, ranges, opts) } + Fields(ref ranges, ref opts) => cut_fields(file, ranges, opts) }; } } From 304ba5f4dcd8edf16d92b63df730f71e920ad2fb Mon Sep 17 00:00:00 2001 From: Arcterus Date: Wed, 9 Jul 2014 18:19:59 -0700 Subject: [PATCH 3/9] Implement shuf --- Makefile | 1 + shuf/shuf.rs | 172 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 shuf/shuf.rs diff --git a/Makefile b/Makefile index e6e5bec4d..453c08696 100644 --- a/Makefile +++ b/Makefile @@ -35,6 +35,7 @@ PROGS := \ rmdir \ sleep \ seq \ + shuf \ sum \ sync \ tac \ diff --git a/shuf/shuf.rs b/shuf/shuf.rs new file mode 100644 index 000000000..619b0e31d --- /dev/null +++ b/shuf/shuf.rs @@ -0,0 +1,172 @@ +#![crate_id(name="shuf", vers="1.0.0", author="Arcterus")] + +/* + * This file is part of the uutils coreutils package. + * + * (c) Arcterus + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +#![feature(macro_rules)] + +extern crate getopts; +extern crate libc; + +use std::cmp; +use std::from_str::from_str; +use std::io; +use std::io::IoResult; +use std::iter::{range_inclusive, RangeInclusive}; +use std::rand; +use std::uint; + +#[path = "../common/util.rs"] +mod util; + +enum Mode { + Default, + Echo, + InputRange(RangeInclusive) +} + +static NAME: &'static str = "shuf"; +static VERSION: &'static str = "0.0.1"; + +pub fn uumain(args: Vec) -> int { + let program = args.get(0).clone(); + + let opts = [ + getopts::optflag("e", "echo", "treat each ARG as an input line"), + getopts::optopt("i", "input-range", "treat each number LO through HI as an input line", "LO-HI"), + getopts::optopt("n", "head-count", "output at most COUNT lines", "COUNT"), + getopts::optopt("o", "output", "write result to FILE instead of standard output", "FILE"), + getopts::optopt("", "random-source", "get random bytes from FILE", "FILE"), + getopts::optflag("r", "repeat", "output lines can be repeated"), + getopts::optflag("z", "zero-terminated", "end lines with 0 byte, not newline"), + getopts::optflag("h", "help", "display this help and exit"), + getopts::optflag("V", "version", "output version information and exit") + ]; + let matches = match getopts::getopts(args.tail(), opts) { + Ok(m) => m, + Err(f) => { + crash!(1, "{}", f) + } + }; + if matches.opt_present("help") { + println!("{name} v{version} + +Usage: + {prog} [OPTION]... [FILE] + {prog} -e [OPTION]... [ARG]... + {prog} -i LO-HI [OPTION]...\n +{usage} +With no FILE, or when FILE is -, read standard input.", + name = NAME, version = VERSION, prog = program, + usage = getopts::usage("Write a random permutation of the input lines to standard output.", opts)); + } else if matches.opt_present("version") { + println!("{} v{}", NAME, VERSION); + } else { + let echo = matches.opt_present("echo"); + let mode = match matches.opt_str("input-range") { + Some(range) => { + if echo { + show_error!("cannot specify more than one mode"); + return 1; + } + match parse_range(range) { + Ok(m) => InputRange(m), + Err((msg, code)) => { + show_error!("{}", msg); + return code; + } + } + } + None => if echo { Echo } else { Default } + }; + let repeat = matches.opt_present("repeat"); + let zero = matches.opt_present("zero-terminated"); + let count = match matches.opt_str("head-count") { + Some(cnt) => match from_str::(cnt.as_slice()) { + Some(val) => val, + None => { + show_error!("'{}' is not a valid count", cnt); + return 1; + } + }, + None => uint::MAX + }; + let output = matches.opt_str("output"); + let random = matches.opt_str("random-source"); + match shuf(matches.free, mode, repeat, zero, count, output, random) { + Err(f) => { + show_error!("{}", f); + return 1; + }, + _ => {} + } + } + + 0 +} + +fn shuf(input: Vec, mode: Mode, repeat: bool, zero: bool, count: uint, output: Option, random: Option) -> IoResult<()> { + match mode { + Echo => shuf_lines(input, repeat, zero, count, output, random), + InputRange(range) => shuf_lines(range.map(|num| num.to_str()).collect(), repeat, zero, count, output, random), + Default => { + let lines: Vec = input.move_iter().flat_map(|filename| { + let mut file = io::BufferedReader::new(crash_if_err!(1, io::File::open(&Path::new(filename.as_slice())))); + let mut lines = vec!(); + for line in file.lines() { + let mut line = crash_if_err!(1, line); + line.pop_char(); + lines.push(line); + } + lines.move_iter() + }).collect(); + shuf_lines(lines, repeat, zero, count, output, random) + } + } +} + +fn shuf_lines(mut lines: Vec, repeat: bool, zero: bool, count: uint, outname: Option, random: Option) -> IoResult<()> { + let mut output = match outname { + Some(name) => box io::BufferedWriter::new(try!(io::File::create(&Path::new(name)))) as Box, + None => box io::stdout() as Box + }; + let mut rng = match random { + Some(name) => box rand::reader::ReaderRng::new(try!(io::File::open(&Path::new(name)))) as Box, + None => box rand::task_rng() as Box + }; + let mut len = lines.len(); + let max = if repeat { count } else { cmp::min(count, len) }; + for _ in range(0, max) { + let idx = rng.next_u32() as uint % len; + try!(write!(output, "{}{}", lines.get(idx), if zero { '\0' } else { '\n' })); + if !repeat { + lines.remove(idx); + len -= 1; + } + } + Ok(()) +} + +fn parse_range(input_range: String) -> Result, (String, int)> { + let split: Vec<&str> = input_range.as_slice().split('-').collect(); + if split.len() != 2 { + Err(("invalid range format".to_string(), 1)) + } else { + let begin = match from_str::(*split.get(0)) { + Some(m) => m, + None => return Err((format!("{} is not a valid number", split.get(0)), 1)) + }; + let end = match from_str::(*split.get(1)) { + Some(m) => m, + None => return Err((format!("{} is not a valid number", split.get(1)), 1)) + }; + Ok(range_inclusive(begin, end)) + } +} + From 16984762d0ecb8923b860e0e9b491e13dd79013f Mon Sep 17 00:00:00 2001 From: Arcterus Date: Wed, 9 Jul 2014 19:11:19 -0700 Subject: [PATCH 4/9] shuf: update for latest Rust --- shuf/shuf.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/shuf/shuf.rs b/shuf/shuf.rs index 619b0e31d..950243b5b 100644 --- a/shuf/shuf.rs +++ b/shuf/shuf.rs @@ -1,4 +1,4 @@ -#![crate_id(name="shuf", vers="1.0.0", author="Arcterus")] +#![crate_name = "shuf"] /* * This file is part of the uutils coreutils package. @@ -114,7 +114,7 @@ With no FILE, or when FILE is -, read standard input.", fn shuf(input: Vec, mode: Mode, repeat: bool, zero: bool, count: uint, output: Option, random: Option) -> IoResult<()> { match mode { Echo => shuf_lines(input, repeat, zero, count, output, random), - InputRange(range) => shuf_lines(range.map(|num| num.to_str()).collect(), repeat, zero, count, output, random), + InputRange(range) => shuf_lines(range.map(|num| num.to_string()).collect(), repeat, zero, count, output, random), Default => { let lines: Vec = input.move_iter().flat_map(|filename| { let mut file = io::BufferedReader::new(crash_if_err!(1, io::File::open(&Path::new(filename.as_slice())))); From ea333b8b7e2b69fc2bc1e19680fae8bc7cb526db Mon Sep 17 00:00:00 2001 From: Arcterus Date: Wed, 9 Jul 2014 19:30:38 -0700 Subject: [PATCH 5/9] shuf: handle stdin --- shuf/shuf.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/shuf/shuf.rs b/shuf/shuf.rs index 950243b5b..62c31e046 100644 --- a/shuf/shuf.rs +++ b/shuf/shuf.rs @@ -48,7 +48,7 @@ pub fn uumain(args: Vec) -> int { getopts::optflag("h", "help", "display this help and exit"), getopts::optflag("V", "version", "output version information and exit") ]; - let matches = match getopts::getopts(args.tail(), opts) { + let mut matches = match getopts::getopts(args.tail(), opts) { Ok(m) => m, Err(f) => { crash!(1, "{}", f) @@ -83,7 +83,16 @@ With no FILE, or when FILE is -, read standard input.", } } } - None => if echo { Echo } else { Default } + None => { + if echo { + Echo + } else { + if matches.free.len() == 0 { + matches.free.push("-".to_string()); + } + Default + } + } }; let repeat = matches.opt_present("repeat"); let zero = matches.opt_present("zero-terminated"); @@ -117,7 +126,18 @@ fn shuf(input: Vec, mode: Mode, repeat: bool, zero: bool, count: uint, o InputRange(range) => shuf_lines(range.map(|num| num.to_string()).collect(), repeat, zero, count, output, random), Default => { let lines: Vec = input.move_iter().flat_map(|filename| { - let mut file = io::BufferedReader::new(crash_if_err!(1, io::File::open(&Path::new(filename.as_slice())))); + let slice = filename.as_slice(); + let mut file_buf; + let mut stdin_buf; + let mut file = io::BufferedReader::new( + if slice == "-" { + stdin_buf = io::stdio::stdin_raw(); + &mut stdin_buf as &mut Reader + } else { + file_buf = crash_if_err!(1, io::File::open(&Path::new(slice))); + &mut file_buf as &mut Reader + } + ); let mut lines = vec!(); for line in file.lines() { let mut line = crash_if_err!(1, line); From 0b76c70aaa85705f102747afc10e6e9d7bd863ce Mon Sep 17 00:00:00 2001 From: Arcterus Date: Wed, 9 Jul 2014 19:39:03 -0700 Subject: [PATCH 6/9] Add shuf to Cargo.toml and remove shuf from to-do list --- Cargo.toml | 4 ++++ README.md | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index eba27bd9a..653866415 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -151,6 +151,10 @@ path = "rmdir/rmdir.rs" name = "seq" path = "seq/seq.rs" +[[bin]] +name = "shuf" +path = "shuf/shuf.rs" + [[bin]] name = "sleep" path = "sleep/sleep.rs" diff --git a/README.md b/README.md index f1b0bf028..335496c63 100644 --- a/README.md +++ b/README.md @@ -153,7 +153,6 @@ To do - runcon - setuidgid - shred -- shuf - sort - split - stat From 8bc9b672cb76a3e0e2ecadc39b344e653ecee214 Mon Sep 17 00:00:00 2001 From: Arcterus Date: Wed, 9 Jul 2014 19:51:51 -0700 Subject: [PATCH 7/9] wc: stop buffering stdin twice --- wc/wc.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wc/wc.rs b/wc/wc.rs index 110369659..bba4d8ae3 100644 --- a/wc/wc.rs +++ b/wc/wc.rs @@ -15,7 +15,7 @@ extern crate getopts; extern crate libc; use std::str::from_utf8; -use std::io::{print, stdin, File, BufferedReader}; +use std::io::{print, stdin_raw, File, BufferedReader}; use StdResult = std::result::Result; use getopts::Matches; @@ -87,6 +87,7 @@ static TAB: u8 = '\t' as u8; static SYN: u8 = 0x16 as u8; static FF: u8 = 0x0C as u8; +#[inline(always)] fn is_word_seperator(byte: u8) -> bool { byte == SPACE || byte == TAB || byte == CR || byte == SYN || byte == FF } @@ -230,7 +231,7 @@ fn print_stats(filename: &str, line_count: uint, word_count: uint, char_count: u fn open(path: String) -> StdResult>, int> { if "-" == path.as_slice() { - let reader = box stdin() as Box; + let reader = box stdin_raw() as Box; return Ok(BufferedReader::new(reader)); } From 9b44dc63114b0183896490a9135ccd5cf3c7dc13 Mon Sep 17 00:00:00 2001 From: Arcterus Date: Wed, 9 Jul 2014 23:12:46 -0700 Subject: [PATCH 8/9] kill: implement short signals (fixes #242) and bug fixes --- kill/kill.rs | 45 +++++++++++++++++++++++++++++++++++---------- kill/signals.rs | 5 +++++ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/kill/kill.rs b/kill/kill.rs index ee575c3fa..fd3dd5df4 100644 --- a/kill/kill.rs +++ b/kill/kill.rs @@ -52,7 +52,6 @@ pub enum Mode { } pub fn uumain(args: Vec) -> int { - let opts = [ optflag("h", "help", "display this help and exit"), optflag("V", "version", "output version information and exit"), @@ -63,6 +62,7 @@ pub fn uumain(args: Vec) -> int { let usage = usage("[options] [...]", opts); + let (args, obs_signal) = handle_obsolete(args); let matches = match getopts(args.tail(), opts) { Ok(m) => m, @@ -72,7 +72,6 @@ pub fn uumain(args: Vec) -> int { }, }; - let mode = if matches.opt_present("version") { Version } else if matches.opt_present("help") { @@ -86,7 +85,7 @@ pub fn uumain(args: Vec) -> int { }; match mode { - Kill => kill(matches.opt_str("signal").unwrap_or("9".to_string()).as_slice(), matches.free), + Kill => return kill(matches.opt_str("signal").unwrap_or(obs_signal.unwrap_or("9".to_string())).as_slice(), matches.free), Table => table(), List => list(matches.opt_str("list")), Help => help(NAME, usage.as_slice()), @@ -100,8 +99,29 @@ fn version() { println!("{} {}", NAME, VERSION); } -fn table() { +fn handle_obsolete(mut args: Vec) -> (Vec, Option) { + let mut i = 0; + while i < args.len() { + // this is safe because slice is valid when it is referenced + let slice: &str = unsafe { std::mem::transmute(args.get(i).as_slice()) }; + if slice.char_at(0) == '-' && slice.len() > 1 && slice.char_at(1).is_digit() { + let val = slice.slice_from(1); + match from_str(val) { + Some(num) => { + if signals::is_signal(num) { + args.remove(i); + return (args, Some(val.to_string())); + } + } + None => break /* getopts will error out for us */ + } + } + i += 1; + } + (args, None) +} +fn table() { let mut name_width = 0; /* Compute the maximum width of a signal name. */ for s in ALL_SIGNALS.iter() { @@ -113,7 +133,7 @@ fn table() { for (idx, signal) in ALL_SIGNALS.iter().enumerate() { print!("{0: >#2} {1: <#8}", idx+1, signal.name); //TODO: obtain max signal width here - + if (idx+1) % 7 == 0 { println!(""); } @@ -144,7 +164,7 @@ fn print_signals() { } else { pos += 1; print!(" "); - } + } } } @@ -176,7 +196,8 @@ fn signal_by_name_or_value(signal_name_or_value: &str) -> Option { None } -fn kill(signalname: &str, pids: std::vec::Vec) { +fn kill(signalname: &str, pids: std::vec::Vec) -> int { + let mut status = 0; let optional_signal_value = signal_by_name_or_value(signalname); let signal_value = match optional_signal_value { Some(x) => x, @@ -187,11 +208,15 @@ fn kill(signalname: &str, pids: std::vec::Vec) { Some(x) => { let result = Process::kill(x, signal_value as int); match result { - Ok(_) => (), - Err(_) => () + Ok(_) => (), + Err(f) => { + show_error!("{}", f); + status = 1; + } }; }, - None => crash!(EXIT_ERR, "failed to parse argument {}", signalname) + None => crash!(EXIT_ERR, "failed to parse argument {}", pid) }; } + status } diff --git a/kill/signals.rs b/kill/signals.rs index 519f4ece8..70b7a55d1 100644 --- a/kill/signals.rs +++ b/kill/signals.rs @@ -138,3 +138,8 @@ pub static ALL_SIGNALS:[Signal<'static>, ..31] = [ Signal{ name: "USR1", value:30 }, Signal{ name: "USR2", value:31 }, ]; + +#[inline(always)] +pub fn is_signal(num: uint) -> bool { + num < ALL_SIGNALS.len() +} From 4eb4a1feffa52d78dd535faafebbead2c5da2ac2 Mon Sep 17 00:00:00 2001 From: Michael Gehring Date: Thu, 10 Jul 2014 08:50:14 +0200 Subject: [PATCH 9/9] wc: fix build --- wc/wc.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wc/wc.rs b/wc/wc.rs index bba4d8ae3..6d6fa5547 100644 --- a/wc/wc.rs +++ b/wc/wc.rs @@ -15,7 +15,8 @@ extern crate getopts; extern crate libc; use std::str::from_utf8; -use std::io::{print, stdin_raw, File, BufferedReader}; +use std::io::{print, File, BufferedReader}; +use std::io::stdio::stdin_raw; use StdResult = std::result::Result; use getopts::Matches;