mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-07-29 12:07:46 +00:00
Improve cat performance
This commit is contained in:
parent
fce8606697
commit
6e518f5cf9
1 changed files with 264 additions and 94 deletions
358
cat/cat.rs
358
cat/cat.rs
|
@ -1,5 +1,4 @@
|
||||||
#![crate_id(name="cat", vers="1.0.0", author="Seldaek")]
|
#![crate_id(name="cat", vers="1.0.0", author="Seldaek")]
|
||||||
#![feature(managed_boxes)]
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This file is part of the uutils coreutils package.
|
* This file is part of the uutils coreutils package.
|
||||||
|
@ -16,24 +15,27 @@ extern crate getopts;
|
||||||
|
|
||||||
use std::os;
|
use std::os;
|
||||||
use std::io::{print, File};
|
use std::io::{print, File};
|
||||||
use std::io::stdio::{stdout_raw, stdin_raw};
|
use std::io::stdio::{stdout_raw, stdin_raw, stderr};
|
||||||
use std::io::{BufferedWriter};
|
use std::io::{IoResult};
|
||||||
|
use std::ptr::{copy_nonoverlapping_memory};
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[allow(dead_code)]
|
||||||
fn main() { uumain(os::args()); }
|
fn main() { uumain(os::args()); }
|
||||||
|
|
||||||
pub fn uumain(args: Vec<String>) {
|
pub fn uumain(args: Vec<String>) {
|
||||||
let program = args.get(0).as_slice();
|
let program = args.get(0).as_slice();
|
||||||
let opts = ~[
|
let opts = [
|
||||||
getopts::optflag("A", "show-all", "equivalent to -vET"),
|
getopts::optflag("A", "show-all", "equivalent to -vET"),
|
||||||
getopts::optflag("b", "number-nonblank", "number nonempty output lines, overrides -n"),
|
getopts::optflag("b", "number-nonblank",
|
||||||
|
"number nonempty output lines, overrides -n"),
|
||||||
getopts::optflag("e", "", "equivalent to -vE"),
|
getopts::optflag("e", "", "equivalent to -vE"),
|
||||||
getopts::optflag("E", "show-ends", "display $ at end of each line"),
|
getopts::optflag("E", "show-ends", "display $ at end of each line"),
|
||||||
getopts::optflag("n", "number", "number all output lines"),
|
getopts::optflag("n", "number", "number all output lines"),
|
||||||
getopts::optflag("s", "squeeze-blank", "suppress repeated empty output lines"),
|
getopts::optflag("s", "squeeze-blank", "suppress repeated empty output lines"),
|
||||||
getopts::optflag("t", "", "equivalent to -vT"),
|
getopts::optflag("t", "", "equivalent to -vT"),
|
||||||
getopts::optflag("T", "show-tabs", "display TAB characters as ^I"),
|
getopts::optflag("T", "show-tabs", "display TAB characters as ^I"),
|
||||||
getopts::optflag("v", "show-nonprinting", "use ^ and M- notation, except for LF (\\n) and TAB (\\t)"),
|
getopts::optflag("v", "show-nonprinting",
|
||||||
|
"use ^ and M- notation, except for LF (\\n) and TAB (\\t)"),
|
||||||
getopts::optflag("h", "help", "display this help and exit"),
|
getopts::optflag("h", "help", "display this help and exit"),
|
||||||
getopts::optflag("V", "version", "output version information and exit"),
|
getopts::optflag("V", "version", "output version information and exit"),
|
||||||
];
|
];
|
||||||
|
@ -47,7 +49,8 @@ pub fn uumain(args: Vec<String>) {
|
||||||
println!("Usage:");
|
println!("Usage:");
|
||||||
println!(" {0:s} [OPTION]... [FILE]...", program);
|
println!(" {0:s} [OPTION]... [FILE]...", program);
|
||||||
println!("");
|
println!("");
|
||||||
print(getopts::usage("Concatenate FILE(s), or standard input, to standard output.", opts).as_slice());
|
print(getopts::usage("Concatenate FILE(s), or standard input, to \
|
||||||
|
standard output.", opts).as_slice());
|
||||||
println!("");
|
println!("");
|
||||||
println!("With no FILE, or when FILE is -, read standard input.");
|
println!("With no FILE, or when FILE is -, read standard input.");
|
||||||
return;
|
return;
|
||||||
|
@ -58,133 +61,300 @@ pub fn uumain(args: Vec<String>) {
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut number_mode = NumberNone;
|
let mut number_mode = NumberNone;
|
||||||
if matches.opt_present("number") {
|
if matches.opt_present("n") {
|
||||||
number_mode = NumberAll;
|
number_mode = NumberAll;
|
||||||
}
|
}
|
||||||
if matches.opt_present("number-nonblank") {
|
if matches.opt_present("b") {
|
||||||
number_mode = NumberNonEmpty;
|
number_mode = NumberNonEmpty;
|
||||||
}
|
}
|
||||||
let show_nonprint = matches.opts_present(["show-nonprinting".to_string(), "show-all".to_string(), "t".to_string(), "e".to_string()]);
|
let show_nonprint = matches.opts_present(["A".to_string(), "e".to_string(),
|
||||||
let show_ends = matches.opts_present(["show-ends".to_string(), "show-all".to_string(), "e".to_string()]);
|
"t".to_string(), "v".to_string()]);
|
||||||
let show_tabs = matches.opts_present(["show-tabs".to_string(), "show-all".to_string(), "t".to_string()]);
|
let show_ends = matches.opts_present(["E".to_string(), "A".to_string(),
|
||||||
let squeeze_blank = matches.opt_present("squeeze-blank");
|
"e".to_string()]);
|
||||||
|
let show_tabs = matches.opts_present(["A".to_string(), "T".to_string(),
|
||||||
|
"t".to_string()]);
|
||||||
|
let squeeze_blank = matches.opt_present("s");
|
||||||
let mut files = matches.free;
|
let mut files = matches.free;
|
||||||
if files.is_empty() {
|
if files.is_empty() {
|
||||||
files = vec!("-".to_string());
|
files.push("-".to_string());
|
||||||
}
|
}
|
||||||
|
|
||||||
exec(files, number_mode, show_nonprint, show_ends, show_tabs, squeeze_blank);
|
exec(files, number_mode, show_nonprint, show_ends, show_tabs, squeeze_blank);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[deriving(Eq)]
|
#[deriving(Eq)]
|
||||||
pub enum NumberingMode {
|
enum NumberingMode {
|
||||||
NumberNone,
|
NumberNone,
|
||||||
NumberNonEmpty,
|
NumberNonEmpty,
|
||||||
NumberAll,
|
NumberAll,
|
||||||
}
|
}
|
||||||
|
|
||||||
static TAB: u8 = '\t' as u8;
|
fn write_lines(files: Vec<String>, number: NumberingMode, squeeze_blank: bool,
|
||||||
#[allow(dead_code)]
|
show_ends: bool) {
|
||||||
static CR: u8 = '\r' as u8;
|
|
||||||
static LF: u8 = '\n' as u8;
|
|
||||||
|
|
||||||
#[cfg(windows)]
|
let mut line_counter: uint = 1;
|
||||||
fn is_newline_char(byte: u8) -> bool {
|
|
||||||
byte == LF || byte == CR
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(unix)]
|
for path in files.iter() {
|
||||||
fn is_newline_char(byte: u8) -> bool {
|
let (mut reader, interactive) = match open(path.as_slice()) {
|
||||||
byte == LF
|
Some(f) => f,
|
||||||
}
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
pub fn exec(files: Vec<String>, number: NumberingMode, show_nonprint: bool, show_ends: bool, show_tabs: bool, squeeze_blank: bool) {
|
let mut in_buf = [0, .. 1024 * 31];
|
||||||
|
let mut out_buf = [0, .. 1024 * 64];
|
||||||
if NumberNone != number || show_nonprint || show_ends || show_tabs || squeeze_blank {
|
let mut writer = UnsafeWriter::new(out_buf.as_mut_slice(), stdout_raw());
|
||||||
let mut counter: uint = 1;
|
let mut at_line_start = true;
|
||||||
let is_numbering = number == NumberAll || number == NumberNonEmpty;
|
loop {
|
||||||
|
let n = match reader.read(in_buf) {
|
||||||
for path in files.iter() {
|
Ok(n) if n != 0 => n,
|
||||||
let mut reader = match open(path.as_slice()) {
|
_ => break,
|
||||||
Some(f) => f,
|
|
||||||
None => { continue }
|
|
||||||
};
|
};
|
||||||
|
let in_buf = in_buf.slice_to(n);
|
||||||
let mut writer = BufferedWriter::with_capacity(1024 * 8, stdout_raw());
|
let mut buf_pos = range(0, n);
|
||||||
let mut at_line_start = true;
|
|
||||||
let mut buf = ~[0, .. 1024 * 4];
|
|
||||||
loop {
|
loop {
|
||||||
// reading from a TTY seems to raise a condition on
|
writer.possibly_flush();
|
||||||
// EOF, rather than return Some(0) like a file.
|
let pos = match buf_pos.next() {
|
||||||
match reader.read(buf) {
|
Some(p) => p,
|
||||||
Ok(n) if n != 0 => {
|
None => break,
|
||||||
for &byte in buf.slice_to(n).iter() {
|
};
|
||||||
if at_line_start && (number == NumberAll || (number == NumberNonEmpty && !is_newline_char(byte))) {
|
if in_buf[pos] == '\n' as u8 {
|
||||||
(write!(&mut writer as &mut Writer, "{0:6u}\t", counter)).unwrap();
|
if !at_line_start || !squeeze_blank {
|
||||||
counter += 1;
|
if at_line_start && number == NumberAll {
|
||||||
at_line_start = false;
|
(write!(&mut writer, "{0:6u}\t", line_counter)).unwrap();
|
||||||
}
|
line_counter += 1;
|
||||||
if is_numbering && byte == LF {
|
|
||||||
at_line_start = true;
|
|
||||||
}
|
|
||||||
if show_tabs && byte == TAB {
|
|
||||||
writer.write(bytes!("^I")).unwrap();
|
|
||||||
} else if show_ends && byte == LF {
|
|
||||||
writer.write(bytes!("$\n")).unwrap();
|
|
||||||
} else if show_nonprint && (byte < 32 || byte >= 127) && !is_newline_char(byte) {
|
|
||||||
let mut byte = byte;
|
|
||||||
if byte >= 128 {
|
|
||||||
writer.write(bytes!("M-")).unwrap();
|
|
||||||
byte = byte - 128;
|
|
||||||
}
|
|
||||||
if byte < 32 {
|
|
||||||
writer.write(['^' as u8, byte + 64]).unwrap();
|
|
||||||
} else if byte == 127 {
|
|
||||||
writer.write(['^' as u8, byte - 64]).unwrap();
|
|
||||||
} else {
|
|
||||||
writer.write_u8(byte).unwrap();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
writer.write_u8(byte).unwrap();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
},
|
if show_ends {
|
||||||
_ => break
|
writer.write_u8('$' as u8).unwrap();
|
||||||
|
}
|
||||||
|
writer.write_u8('\n' as u8).unwrap();
|
||||||
|
if interactive {
|
||||||
|
writer.flush().unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
at_line_start = true;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
if at_line_start && number != NumberNone {
|
||||||
|
(write!(&mut writer, "{0:6u}\t", line_counter)).unwrap();
|
||||||
|
line_counter += 1;
|
||||||
|
}
|
||||||
|
match in_buf.tailn(pos).iter().position(|c| *c == '\n' as u8) {
|
||||||
|
Some(p) => {
|
||||||
|
writer.write(in_buf.slice(pos, pos + p)).unwrap();
|
||||||
|
if show_ends {
|
||||||
|
writer.write_u8('$' as u8).unwrap();
|
||||||
|
}
|
||||||
|
writer.write_u8('\n' as u8).unwrap();
|
||||||
|
if interactive {
|
||||||
|
writer.flush().unwrap();
|
||||||
|
}
|
||||||
|
buf_pos = range(pos + p + 1, n);
|
||||||
|
at_line_start = true;
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
writer.write(in_buf.tailn(pos)).unwrap();
|
||||||
|
at_line_start = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_bytes(files: Vec<String>, number: NumberingMode, squeeze_blank: bool,
|
||||||
|
show_ends: bool, show_nonprint: bool, show_tabs: bool) {
|
||||||
|
|
||||||
|
let mut line_counter: uint = 1;
|
||||||
|
|
||||||
let mut writer = stdout_raw();
|
|
||||||
let mut buf = ~[0, .. 1024 * 64];
|
|
||||||
// passthru mode
|
|
||||||
for path in files.iter() {
|
for path in files.iter() {
|
||||||
let mut reader = match open(path.as_slice()) {
|
let (mut reader, interactive) = match open(path.as_slice()) {
|
||||||
Some(f) => f,
|
Some(f) => f,
|
||||||
None => { continue }
|
None => continue,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Flush all 1024 iterations.
|
||||||
|
let mut flush_counter = range(0, 1024);
|
||||||
|
|
||||||
|
let mut in_buf = [0, .. 1024 * 32];
|
||||||
|
let mut out_buf = [0, .. 1024 * 64];
|
||||||
|
let mut writer = UnsafeWriter::new(out_buf.as_mut_slice(), stdout_raw());
|
||||||
|
let mut at_line_start = true;
|
||||||
|
loop {
|
||||||
|
let n = match reader.read(in_buf) {
|
||||||
|
Ok(n) if n != 0 => n,
|
||||||
|
_ => break,
|
||||||
|
};
|
||||||
|
for &byte in in_buf.slice_to(n).iter() {
|
||||||
|
if flush_counter.next().is_none() {
|
||||||
|
writer.possibly_flush();
|
||||||
|
flush_counter = range(0, 1024);
|
||||||
|
}
|
||||||
|
if byte == '\n' as u8 {
|
||||||
|
if !at_line_start || !squeeze_blank {
|
||||||
|
if at_line_start && number == NumberAll {
|
||||||
|
(write!(&mut writer, "{0:6u}\t", line_counter)).unwrap();
|
||||||
|
line_counter += 1;
|
||||||
|
}
|
||||||
|
if show_ends {
|
||||||
|
writer.write_u8('$' as u8).unwrap();
|
||||||
|
}
|
||||||
|
writer.write_u8('\n' as u8).unwrap();
|
||||||
|
if interactive {
|
||||||
|
writer.flush().unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
at_line_start = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if at_line_start && number != NumberNone {
|
||||||
|
(write!(&mut writer, "{0:6u}\t", line_counter)).unwrap();
|
||||||
|
line_counter += 1;
|
||||||
|
at_line_start = false;
|
||||||
|
}
|
||||||
|
// This code is slow because of the many branches. cat in glibc avoids
|
||||||
|
// this by having the whole loop inside show_nonprint.
|
||||||
|
if byte == '\t' as u8 {
|
||||||
|
if show_tabs {
|
||||||
|
writer.write_str("^I")
|
||||||
|
} else {
|
||||||
|
writer.write_u8(byte)
|
||||||
|
}
|
||||||
|
} else if show_nonprint {
|
||||||
|
let byte = match byte {
|
||||||
|
128 .. 255 => {
|
||||||
|
writer.write_str("M-").unwrap();
|
||||||
|
byte - 128
|
||||||
|
},
|
||||||
|
_ => byte,
|
||||||
|
};
|
||||||
|
match byte {
|
||||||
|
0 .. 31 => writer.write(['^' as u8, byte + 64]),
|
||||||
|
127 => writer.write(['^' as u8, byte - 64]),
|
||||||
|
_ => writer.write_u8(byte),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
writer.write_u8(byte)
|
||||||
|
}.unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_fast(files: Vec<String>) {
|
||||||
|
let mut writer = stdout_raw();
|
||||||
|
let mut in_buf = [0, .. 1024 * 64];
|
||||||
|
|
||||||
|
for path in files.iter() {
|
||||||
|
let (mut reader, _) = match open(path.as_slice()) {
|
||||||
|
Some(x) => x,
|
||||||
|
None => continue,
|
||||||
};
|
};
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
// reading from a TTY seems to raise a condition on EOF,
|
match reader.read(in_buf) {
|
||||||
// rather than return Some(0) like a file.
|
|
||||||
match reader.read(buf) {
|
|
||||||
Ok(n) if n != 0 => {
|
Ok(n) if n != 0 => {
|
||||||
writer.write(buf.slice_to(n)).unwrap();
|
// This interface is completely broken.
|
||||||
}, _ => break
|
writer.write(in_buf.slice_to(n)).unwrap();
|
||||||
|
},
|
||||||
|
_ => break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn open(path: &str) -> Option<Box<Reader>> {
|
fn exec(files: Vec<String>, number: NumberingMode, show_nonprint: bool,
|
||||||
if "-" == path {
|
show_ends: bool, show_tabs: bool, squeeze_blank: bool) {
|
||||||
return Some(box stdin_raw() as Box<Reader>);
|
|
||||||
}
|
|
||||||
|
|
||||||
match File::open(&std::path::Path::new(path.as_slice())) {
|
if show_nonprint || show_tabs {
|
||||||
Ok(fd) => return Some(box fd as Box<Reader>),
|
write_bytes(files, number, squeeze_blank, show_ends, show_nonprint, show_tabs);
|
||||||
Err(e) => fail!("cat: {0:s}: {1:s}", path, e.to_str())
|
} else if number != NumberNone || squeeze_blank || show_ends {
|
||||||
|
write_lines(files, number, squeeze_blank, show_ends);
|
||||||
|
} else {
|
||||||
|
write_fast(files);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn open(path: &str) -> Option<(Box<Reader>, bool)> {
|
||||||
|
if path == "-" {
|
||||||
|
let stdin = stdin_raw();
|
||||||
|
let interactive = stdin.isatty();
|
||||||
|
return Some((box stdin as Box<Reader>, interactive));
|
||||||
|
}
|
||||||
|
|
||||||
|
match File::open(&std::path::Path::new(path)) {
|
||||||
|
Ok(f) => return Some((box f as Box<Reader>, false)),
|
||||||
|
Err(e) => {
|
||||||
|
(writeln!(stderr(), "cat: {0:s}: {1:s}", path, e.to_str())).unwrap();
|
||||||
|
return None;
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
struct UnsafeWriter<'a, W> {
|
||||||
|
inner: W,
|
||||||
|
buf: &'a mut [u8],
|
||||||
|
pos: uint,
|
||||||
|
threshold: uint,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, W: Writer> UnsafeWriter<'a, W> {
|
||||||
|
fn new(buf: &'a mut [u8], inner: W) -> UnsafeWriter<'a, W> {
|
||||||
|
let threshold = buf.len()/2;
|
||||||
|
UnsafeWriter {
|
||||||
|
inner: inner,
|
||||||
|
buf: buf,
|
||||||
|
pos: 0,
|
||||||
|
threshold: threshold,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flush_buf(&mut self) -> IoResult<()> {
|
||||||
|
if self.pos != 0 {
|
||||||
|
let ret = self.inner.write(self.buf.slice_to(self.pos));
|
||||||
|
self.pos = 0;
|
||||||
|
ret
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn possibly_flush(&mut self) {
|
||||||
|
if self.pos > self.threshold {
|
||||||
|
self.inner.write(self.buf.slice_to(self.pos)).unwrap();
|
||||||
|
self.pos = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline(never)]
|
||||||
|
fn fail() -> ! {
|
||||||
|
fail!("assertion failed");
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, W: Writer> Writer for UnsafeWriter<'a, W> {
|
||||||
|
fn write(&mut self, buf: &[u8]) -> IoResult<()> {
|
||||||
|
let dst = self.buf.mut_slice_from(self.pos);
|
||||||
|
if buf.len() > dst.len() {
|
||||||
|
fail();
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
copy_nonoverlapping_memory(dst.as_mut_ptr(), buf.as_ptr(), buf.len())
|
||||||
|
}
|
||||||
|
self.pos += buf.len();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flush(&mut self) -> IoResult<()> {
|
||||||
|
self.flush_buf().and_then(|()| self.inner.flush())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[unsafe_destructor]
|
||||||
|
impl<'a, W: Writer> Drop for UnsafeWriter<'a, W> {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
let _ = self.flush_buf();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* vim: set ai ts=4 sw=4 sts=4 et : */
|
/* vim: set ai ts=4 sw=4 sts=4 et : */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue