mirror of
https://github.com/RGBCube/uutils-coreutils
synced 2025-08-01 05:27:45 +00:00
wc: boost speed to be nearly as fast as GNU
This commit is contained in:
parent
32ef4d15b0
commit
9a281adc1e
1 changed files with 15 additions and 18 deletions
33
src/wc/wc.rs
33
src/wc/wc.rs
|
@ -82,12 +82,12 @@ pub fn uumain(args: Vec<String>) -> int {
|
||||||
0
|
0
|
||||||
}
|
}
|
||||||
|
|
||||||
static CR: u8 = '\r' as u8;
|
const CR: u8 = '\r' as u8;
|
||||||
static LF: u8 = '\n' as u8;
|
const LF: u8 = '\n' as u8;
|
||||||
static SPACE: u8 = ' ' as u8;
|
const SPACE: u8 = ' ' as u8;
|
||||||
static TAB: u8 = '\t' as u8;
|
const TAB: u8 = '\t' as u8;
|
||||||
static SYN: u8 = 0x16 as u8;
|
const SYN: u8 = 0x16 as u8;
|
||||||
static FF: u8 = 0x0C as u8;
|
const FF: u8 = 0x0C as u8;
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn is_word_seperator(byte: u8) -> bool {
|
fn is_word_seperator(byte: u8) -> bool {
|
||||||
|
@ -105,10 +105,7 @@ pub fn wc(files: Vec<String>, matches: &Matches) -> StdResult<(), int> {
|
||||||
let mut max_str_len: uint = 0;
|
let mut max_str_len: uint = 0;
|
||||||
|
|
||||||
for path in files.iter() {
|
for path in files.iter() {
|
||||||
let mut reader = match open(path.to_string()) {
|
let mut reader = try!(open(path.as_slice()));
|
||||||
Ok(f) => f,
|
|
||||||
Err(e) => { return Err(e); }
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut line_count: uint = 0;
|
let mut line_count: uint = 0;
|
||||||
let mut word_count: uint = 0;
|
let mut word_count: uint = 0;
|
||||||
|
@ -123,17 +120,17 @@ pub fn wc(files: Vec<String>, matches: &Matches) -> StdResult<(), int> {
|
||||||
match reader.read_until(LF) {
|
match reader.read_until(LF) {
|
||||||
Ok(raw_line) => {
|
Ok(raw_line) => {
|
||||||
// GNU 'wc' only counts lines that end in LF as lines
|
// GNU 'wc' only counts lines that end in LF as lines
|
||||||
if raw_line.iter().last().unwrap() == &LF {
|
if *raw_line.last().unwrap() == LF {
|
||||||
line_count += 1;
|
line_count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
byte_count += raw_line.iter().len();
|
byte_count += raw_line.len();
|
||||||
|
|
||||||
// try and convert the bytes to UTF-8 first
|
// try and convert the bytes to UTF-8 first
|
||||||
match from_utf8(raw_line.as_slice()) {
|
match from_utf8(raw_line.as_slice()) {
|
||||||
Some(line) => {
|
Some(line) => {
|
||||||
word_count += line.words().count();
|
word_count += line.words().count();
|
||||||
current_char_count = line.chars().count();
|
current_char_count = line.char_len();
|
||||||
char_count += current_char_count;
|
char_count += current_char_count;
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
|
@ -151,7 +148,7 @@ pub fn wc(files: Vec<String>, matches: &Matches) -> StdResult<(), int> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if current_char_count > longest_line_length {
|
if current_char_count > longest_line_length {
|
||||||
// we subtract one here because `line.iter().len()` includes the LF
|
// we subtract one here because `line.len()` includes the LF
|
||||||
// matches GNU 'wc' behaviour
|
// matches GNU 'wc' behaviour
|
||||||
longest_line_length = current_char_count - 1;
|
longest_line_length = current_char_count - 1;
|
||||||
}
|
}
|
||||||
|
@ -231,13 +228,13 @@ fn print_stats(filename: &str, line_count: uint, word_count: uint, char_count: u
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn open(path: String) -> StdResult<BufferedReader<Box<Reader+'static>>, int> {
|
fn open(path: &str) -> StdResult<BufferedReader<Box<Reader+'static>>, int> {
|
||||||
if "-" == path.as_slice() {
|
if "-" == path {
|
||||||
let reader = box stdin_raw() as Box<Reader>;
|
let reader = box stdin_raw() as Box<Reader>;
|
||||||
return Ok(BufferedReader::new(reader));
|
return Ok(BufferedReader::new(reader));
|
||||||
}
|
}
|
||||||
|
|
||||||
let fpath = Path::new(path.as_slice());
|
let fpath = Path::new(path);
|
||||||
if fpath.is_dir() {
|
if fpath.is_dir() {
|
||||||
show_info!("{}: is a directory", path);
|
show_info!("{}: is a directory", path);
|
||||||
}
|
}
|
||||||
|
@ -245,7 +242,7 @@ fn open(path: String) -> StdResult<BufferedReader<Box<Reader+'static>>, int> {
|
||||||
Ok(fd) => {
|
Ok(fd) => {
|
||||||
let reader = box fd as Box<Reader>;
|
let reader = box fd as Box<Reader>;
|
||||||
Ok(BufferedReader::new(reader))
|
Ok(BufferedReader::new(reader))
|
||||||
},
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
show_error!("wc: {}: {}", path, e);
|
show_error!("wc: {}: {}", path, e);
|
||||||
Err(1)
|
Err(1)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue