diff --git a/src/cat/Cargo.toml b/src/cat/Cargo.toml index dab41706e..9b54d06b5 100644 --- a/src/cat/Cargo.toml +++ b/src/cat/Cargo.toml @@ -8,7 +8,6 @@ name = "uu_cat" path = "cat.rs" [dependencies] -libc = "*" uucore = { path="../uucore" } [[bin]] diff --git a/src/cat/cat.rs b/src/cat/cat.rs index d303388c8..fb0c6cc16 100644 --- a/src/cat/cat.rs +++ b/src/cat/cat.rs @@ -1,35 +1,31 @@ #![crate_name = "uu_cat"] -/* - * This file is part of the uutils coreutils package. - * - * (c) Jordi Boggiano - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code. - */ - -/* last synced with: cat (GNU coreutils) 8.13 */ - -extern crate libc; - +// This file is part of the uutils coreutils package. +// +// (c) Jordi Boggiano +// (c) Evgeniy Klyuchikov +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// #[macro_use] extern crate uucore; +// last synced with: cat (GNU coreutils) 8.13 use std::fs::File; -use std::intrinsics::{copy_nonoverlapping}; -use std::io::{stdout, stdin, stderr, Write, Read, Result}; +use std::io::{stdout, stdin, stderr, Write, Read, BufWriter}; use uucore::fs::is_stdin_interactive; -static SYNTAX: &'static str = "[OPTION]... [FILE]..."; +static SYNTAX: &'static str = "[OPTION]... [FILE]..."; static SUMMARY: &'static str = "Concatenate FILE(s), or standard input, to standard output - With no FILE, or when FILE is -, read standard input."; -static LONG_HELP: &'static str = ""; + With no FILE, or when FILE is -, read standard input."; +static LONG_HELP: &'static str = ""; pub fn uumain(args: Vec) -> i32 { let matches = new_coreopts!(SYNTAX, SUMMARY, LONG_HELP) .optflag("A", "show-all", "equivalent to -vET") - .optflag("b", "number-nonblank", + .optflag("b", + "number-nonblank", "number nonempty output lines, overrides -n") .optflag("e", "", "equivalent to -vE") .optflag("E", "show-ends", "display $ at end of each line") @@ -37,7 +33,8 @@ pub fn uumain(args: Vec) -> i32 { .optflag("s", "squeeze-blank", "suppress repeated empty output lines") .optflag("t", "", "equivalent to -vT") .optflag("T", "show-tabs", "display TAB characters as ^I") - .optflag("v", "show-nonprinting", + .optflag("v", + "show-nonprinting", "use ^ and M- notation, except for LF (\\n) and TAB (\\t)") .parse(args); @@ -48,198 +45,37 @@ pub fn uumain(args: Vec) -> i32 { } else { NumberingMode::NumberNone }; - let show_nonprint = matches.opts_present(&["A".to_owned(), "e".to_owned(), - "t".to_owned(), "v".to_owned()]); - let show_ends = matches.opts_present(&["E".to_owned(), "A".to_owned(), - "e".to_owned()]); - let show_tabs = matches.opts_present(&["A".to_owned(), "T".to_owned(), - "t".to_owned()]); + let show_nonprint = + matches.opts_present(&["A".to_owned(), "e".to_owned(), "t".to_owned(), "v".to_owned()]); + let show_ends = matches.opts_present(&["E".to_owned(), "A".to_owned(), "e".to_owned()]); + let show_tabs = matches.opts_present(&["A".to_owned(), "T".to_owned(), "t".to_owned()]); let squeeze_blank = matches.opt_present("s"); let mut files = matches.free; if files.is_empty() { files.push("-".to_owned()); } - exec(files, number_mode, show_nonprint, show_ends, show_tabs, squeeze_blank); - - 0 -} - -#[derive(Eq, PartialEq)] -enum NumberingMode { - NumberNone, - NumberNonEmpty, - NumberAll, -} - -fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, - show_ends: bool) { - - let mut line_counter: usize = 1; - - for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) { - - let mut in_buf = [0; 1024 * 31]; - let mut out_buf = [0; 1024 * 64]; - let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); - let mut at_line_start = true; - let mut one_blank_kept = false; - while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { break } - - let in_buf = &in_buf[..n]; - let mut buf_pos = 0..n; - loop { - writer.possibly_flush(); - let pos = match buf_pos.next() { - Some(p) => p, - None => break, - }; - if in_buf[pos] == '\n' as u8 { - if !at_line_start || !squeeze_blank || !one_blank_kept { - one_blank_kept = true; - if at_line_start && number == NumberingMode::NumberAll { - (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); - line_counter += 1; - } - if show_ends { - writer.write_all(&['$' as u8]).unwrap(); - } - writer.write_all(&['\n' as u8]).unwrap(); - if interactive { - writer.flush().unwrap(); - } - } - at_line_start = true; - continue; - } else if one_blank_kept { - one_blank_kept = false; - } - if at_line_start && number != NumberingMode::NumberNone { - (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); - line_counter += 1; - } - match in_buf[pos..].iter().position(|c| *c == '\n' as u8) { - Some(p) => { - writer.write_all(&in_buf[pos..pos + p]).unwrap(); - if show_ends { - writer.write_all(&['$' as u8]).unwrap(); - } - writer.write_all(&['\n' as u8]).unwrap(); - if interactive { - writer.flush().unwrap(); - } - buf_pos = pos + p + 1..n; - at_line_start = true; - }, - None => { - writer.write_all(&in_buf[pos..]).unwrap(); - at_line_start = false; - break; - } - }; - } - } - } -} - -fn write_bytes(files: Vec, number: NumberingMode, squeeze_blank: bool, - show_ends: bool, show_nonprint: bool, show_tabs: bool) { - - let mut line_counter: usize = 1; - - for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) { - - // Flush all 1024 iterations. - let mut flush_counter = 0usize..1024; - - let mut in_buf = [0; 1024 * 32]; - let mut out_buf = [0; 1024 * 64]; - let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); - let mut at_line_start = true; - while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { break } - - for &byte in in_buf[..n].iter() { - if flush_counter.next().is_none() { - writer.possibly_flush(); - flush_counter = 0usize..1024; - } - if byte == '\n' as u8 { - if !at_line_start || !squeeze_blank { - if at_line_start && number == NumberingMode::NumberAll { - (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); - line_counter += 1; - } - if show_ends { - writer.write_all(&['$' as u8]).unwrap(); - } - writer.write_all(&['\n' as u8]).unwrap(); - if interactive { - writer.flush().unwrap(); - } - } - at_line_start = true; - continue; - } - if at_line_start && number != NumberingMode::NumberNone { - (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); - line_counter += 1; - at_line_start = false; - } - // This code is slow because of the many branches. cat in glibc avoids - // this by having the whole loop inside show_nonprint. - if byte == '\t' as u8 { - if show_tabs { - writer.write_all("^I".as_bytes()) - } else { - writer.write_all(&[byte]) - } - } else if show_nonprint { - let byte = match byte { - 128 ... 255 => { - writer.write_all("M-".as_bytes()).unwrap(); - byte - 128 - }, - _ => byte, - }; - match byte { - 0 ... 31 => writer.write_all(&['^' as u8, byte + 64]), - 127 => writer.write_all(&['^' as u8, byte - 64]), - _ => writer.write_all(&[byte]), - } - } else { - writer.write_all(&[byte]) - }.unwrap(); - } - } - } -} - -fn write_fast(files: Vec) { - let mut writer = stdout(); - let mut in_buf = [0; 1024 * 64]; - - for (mut reader, _) in files.iter().filter_map(|p| open(&p[..])) { - while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { break } - // This interface is completely broken. - writer.write_all(&in_buf[..n]).unwrap(); - } - } -} - -fn exec(files: Vec, number: NumberingMode, show_nonprint: bool, - show_ends: bool, show_tabs: bool, squeeze_blank: bool) { - - if show_nonprint || show_tabs { - write_bytes(files, number, squeeze_blank, show_ends, show_nonprint, show_tabs); - } else if number != NumberingMode::NumberNone || squeeze_blank || show_ends { - write_lines(files, number, squeeze_blank, show_ends); + if show_tabs || show_nonprint || show_ends || squeeze_blank || + number_mode != NumberingMode::NumberNone { + write_lines(files, + number_mode, + squeeze_blank, + show_ends, + show_tabs, + show_nonprint); } else { write_fast(files); } pipe_flush!(); + + 0 +} + +#[derive(PartialEq)] +enum NumberingMode { + NumberNone, + NumberNonEmpty, + NumberAll, } fn open(path: &str) -> Option<(Box, bool)> { @@ -253,75 +89,157 @@ fn open(path: &str) -> Option<(Box, bool)> { Err(e) => { (writeln!(&mut stderr(), "cat: {0}: {1}", path, e.to_string())).unwrap(); None - }, - } -} - -struct UnsafeWriter<'a, W: Write> { - inner: W, - buf: &'a mut [u8], - pos: usize, - threshold: usize, -} - -impl<'a, W: Write> UnsafeWriter<'a, W> { - fn new(buf: &'a mut [u8], inner: W) -> UnsafeWriter<'a, W> { - let threshold = buf.len()/2; - UnsafeWriter { - inner: inner, - buf: buf, - pos: 0, - threshold: threshold, } } +} - fn flush_buf(&mut self) -> Result<()> { - if self.pos != 0 { - let ret = self.inner.write(&self.buf[..self.pos]); - self.pos = 0; - match ret { - Ok(_) => Ok(()), - Err(e) => Err(e), +fn write_fast(files: Vec) { + let mut writer = stdout(); + let mut in_buf = [0; 1024 * 64]; + + for (mut reader, _) in files.iter().filter_map(|p| open(&p[..])) { + while let Ok(n) = reader.read(&mut in_buf) { + if n == 0 { + break; } - } else { - Ok(()) - } - } - - fn possibly_flush(&mut self) { - if self.pos > self.threshold { - self.inner.write_all(&self.buf[..self.pos]).unwrap(); - self.pos = 0; + writer.write_all(&in_buf[..n]).unwrap(); } } } -#[inline(never)] -fn fail() -> ! { - panic!("assertion failed"); -} +fn write_lines(files: Vec, + number: NumberingMode, + squeeze_blank: bool, + show_ends: bool, + show_tabs: bool, + show_nonprint: bool) { + // initialize end of line + let end_of_line = if show_ends { + "$\n".as_bytes() + } else { + "\n".as_bytes() + }; + // initialize tab simbol + let tab = if show_tabs { + "^I".as_bytes() + } else { + "\t".as_bytes() + }; + let mut line_counter: usize = 1; -impl<'a, W: Write> Write for UnsafeWriter<'a, W> { - fn write(&mut self, buf: &[u8]) -> Result { - let dst = &mut self.buf[self.pos..]; - let len = buf.len(); - if len > dst.len() { - fail(); + for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) { + let mut in_buf = [0; 1024 * 31]; + let mut writer = BufWriter::with_capacity(1024 * 64, stdout()); + let mut at_line_start = true; + let mut one_blank_kept = false; + while let Ok(n) = reader.read(&mut in_buf) { + if n == 0 { + break; + } + let in_buf = &in_buf[..n]; + let mut pos = 0; + while pos < n { + // skip empty lines enumerating them if needed + if in_buf[pos] == '\n' as u8 { + if !at_line_start || !squeeze_blank || !one_blank_kept { + one_blank_kept = true; + if at_line_start && number == NumberingMode::NumberAll { + (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); + line_counter += 1; + } + writer.write_all(end_of_line).unwrap(); + if interactive { + writer.flush().unwrap(); + } + } + at_line_start = true; + pos += 1; + continue; + } + one_blank_kept = false; + if at_line_start && number != NumberingMode::NumberNone { + (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); + line_counter += 1; + } + + // print to end of line or end of buffer + let offset = if show_nonprint { + write_nonprint_to_end(&in_buf[pos..], &mut writer, tab) + } else if show_tabs { + write_tab_to_end(&in_buf[pos..], &mut writer) + } else { + write_to_end(&in_buf[pos..], &mut writer) + }; + // end of buffer? + if offset == 0 { + at_line_start = false; + break; + } + // print suitable end of line + writer.write_all(end_of_line).unwrap(); + if interactive { + writer.flush().unwrap(); + } + at_line_start = true; + pos += offset; + } } - unsafe { - copy_nonoverlapping(buf.as_ptr(), dst.as_mut_ptr(), len) + } +} + +// write***_to_end methods +// Write all symbols till end of line or end of buffer is reached +// Return the (number of written symbols + 1) or 0 if the end of buffer is reached +fn write_to_end(in_buf: &[u8], writer: &mut W) -> usize { + match in_buf.iter().position(|c| *c == '\n' as u8) { + Some(p) => { + writer.write_all(&in_buf[..p]).unwrap(); + p + 1 + } + None => { + writer.write_all(in_buf).unwrap(); + 0 } - self.pos += len; - Ok(len) - } - - fn flush(&mut self) -> Result<()> { - self.flush_buf().and_then(|()| self.inner.flush()) } } -impl<'a, W: Write> Drop for UnsafeWriter<'a, W> { - fn drop(&mut self) { - let _ = self.flush_buf(); +fn write_tab_to_end(mut in_buf: &[u8], writer: &mut W) -> usize { + loop { + match in_buf.iter().position(|c| *c == '\n' as u8 || *c == '\t' as u8) { + Some(p) => { + writer.write_all(&in_buf[..p]).unwrap(); + if in_buf[p] == '\n' as u8 { + return p + 1; + } else { + writer.write_all("^I".as_bytes()).unwrap(); + in_buf = &in_buf[p + 1..]; + } + } + None => { + writer.write_all(in_buf).unwrap(); + return 0; + } + }; } } + +fn write_nonprint_to_end(in_buf: &[u8], writer: &mut W, tab: &[u8]) -> usize { + let mut count = 0; + + for byte in in_buf.iter().map(|c| *c) { + if byte == '\n' as u8 { + break; + } + match byte { + 9 => writer.write_all(tab), + 0...8 | 10...31 => writer.write_all(&['^' as u8, byte + 64]), + 32...126 => writer.write_all(&[byte]), + 127 => writer.write_all(&['^' as u8, byte - 64]), + 128...159 => writer.write_all(&['M' as u8, '-' as u8, '^' as u8, byte - 64]), + 160...254 => writer.write_all(&['M' as u8, '-' as u8, byte - 128]), + _ => writer.write_all(&['M' as u8, '-' as u8, '^' as u8, 63]), + }.unwrap(); + count += 1; + } + if count != in_buf.len() { count + 1 } else { 0 } +}