From dd16c58ab4549a9a15c4a74705834bd5160712fd Mon Sep 17 00:00:00 2001 From: evgeniy Date: Sun, 16 Oct 2016 12:43:23 +0200 Subject: [PATCH 1/7] cat utility refactoring: - Less if branches - Unnecessary range variable deleted - Optional fail-assertion commented --- src/cat/cat.rs | 196 +++++++++++++++++++++++++++---------------------- 1 file changed, 110 insertions(+), 86 deletions(-) diff --git a/src/cat/cat.rs b/src/cat/cat.rs index d303388c8..c1a81bf05 100644 --- a/src/cat/cat.rs +++ b/src/cat/cat.rs @@ -1,17 +1,14 @@ #![crate_name = "uu_cat"] -/* - * This file is part of the uutils coreutils package. - * - * (c) Jordi Boggiano - * - * For the full copyright and license information, please view the LICENSE - * file that was distributed with this source code. - */ +// This file is part of the uutils coreutils package. +// +// (c) Jordi Boggiano +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// -/* last synced with: cat (GNU coreutils) 8.13 */ - -extern crate libc; +// last synced with: cat (GNU coreutils) 8.13 #[macro_use] extern crate uucore; @@ -21,15 +18,16 @@ use std::intrinsics::{copy_nonoverlapping}; use std::io::{stdout, stdin, stderr, Write, Read, Result}; use uucore::fs::is_stdin_interactive; -static SYNTAX: &'static str = "[OPTION]... [FILE]..."; +static SYNTAX: &'static str = "[OPTION]... [FILE]..."; static SUMMARY: &'static str = "Concatenate FILE(s), or standard input, to standard output - With no FILE, or when FILE is -, read standard input."; -static LONG_HELP: &'static str = ""; + With no FILE, or when FILE is -, read standard input."; +static LONG_HELP: &'static str = ""; pub fn uumain(args: Vec) -> i32 { let matches = new_coreopts!(SYNTAX, SUMMARY, LONG_HELP) .optflag("A", "show-all", "equivalent to -vET") - .optflag("b", "number-nonblank", + .optflag("b", + "number-nonblank", "number nonempty output lines, overrides -n") .optflag("e", "", "equivalent to -vE") .optflag("E", "show-ends", "display $ at end of each line") @@ -37,7 +35,8 @@ pub fn uumain(args: Vec) -> i32 { .optflag("s", "squeeze-blank", "suppress repeated empty output lines") .optflag("t", "", "equivalent to -vT") .optflag("T", "show-tabs", "display TAB characters as ^I") - .optflag("v", "show-nonprinting", + .optflag("v", + "show-nonprinting", "use ^ and M- notation, except for LF (\\n) and TAB (\\t)") .parse(args); @@ -48,19 +47,22 @@ pub fn uumain(args: Vec) -> i32 { } else { NumberingMode::NumberNone }; - let show_nonprint = matches.opts_present(&["A".to_owned(), "e".to_owned(), - "t".to_owned(), "v".to_owned()]); - let show_ends = matches.opts_present(&["E".to_owned(), "A".to_owned(), - "e".to_owned()]); - let show_tabs = matches.opts_present(&["A".to_owned(), "T".to_owned(), - "t".to_owned()]); + let show_nonprint = + matches.opts_present(&["A".to_owned(), "e".to_owned(), "t".to_owned(), "v".to_owned()]); + let show_ends = matches.opts_present(&["E".to_owned(), "A".to_owned(), "e".to_owned()]); + let show_tabs = matches.opts_present(&["A".to_owned(), "T".to_owned(), "t".to_owned()]); let squeeze_blank = matches.opt_present("s"); let mut files = matches.free; if files.is_empty() { files.push("-".to_owned()); } - exec(files, number_mode, show_nonprint, show_ends, show_tabs, squeeze_blank); + exec(files, + number_mode, + show_nonprint, + show_ends, + show_tabs, + squeeze_blank); 0 } @@ -72,29 +74,32 @@ enum NumberingMode { NumberAll, } -fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, - show_ends: bool) { +fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, show_ends: bool) { + // initialize end of line + let end_of_line = if show_ends { + "$\n".as_bytes() + } else { + "\n".as_bytes() + }; let mut line_counter: usize = 1; for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) { - let mut in_buf = [0; 1024 * 31]; + let mut in_buf = [0; 1024 * 31]; let mut out_buf = [0; 1024 * 64]; let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); let mut at_line_start = true; let mut one_blank_kept = false; while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { break } + if n == 0 { + break; + } let in_buf = &in_buf[..n]; - let mut buf_pos = 0..n; - loop { + let mut pos = 0; + while pos < n { writer.possibly_flush(); - let pos = match buf_pos.next() { - Some(p) => p, - None => break, - }; if in_buf[pos] == '\n' as u8 { if !at_line_start || !squeeze_blank || !one_blank_kept { one_blank_kept = true; @@ -102,15 +107,13 @@ fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); line_counter += 1; } - if show_ends { - writer.write_all(&['$' as u8]).unwrap(); - } - writer.write_all(&['\n' as u8]).unwrap(); + writer.write_all(end_of_line).unwrap(); if interactive { writer.flush().unwrap(); } } at_line_start = true; + pos += 1; continue; } else if one_blank_kept { one_blank_kept = false; @@ -122,16 +125,13 @@ fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, match in_buf[pos..].iter().position(|c| *c == '\n' as u8) { Some(p) => { writer.write_all(&in_buf[pos..pos + p]).unwrap(); - if show_ends { - writer.write_all(&['$' as u8]).unwrap(); - } - writer.write_all(&['\n' as u8]).unwrap(); + writer.write_all(end_of_line).unwrap(); if interactive { writer.flush().unwrap(); } - buf_pos = pos + p + 1..n; + pos += p + 1; at_line_start = true; - }, + } None => { writer.write_all(&in_buf[pos..]).unwrap(); at_line_start = false; @@ -143,8 +143,24 @@ fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, } } -fn write_bytes(files: Vec, number: NumberingMode, squeeze_blank: bool, - show_ends: bool, show_nonprint: bool, show_tabs: bool) { +fn write_bytes(files: Vec, + number: NumberingMode, + squeeze_blank: bool, + show_ends: bool, + show_tabs: bool, + show_nonprint: bool) { + // initialize end of line + let end_of_line = if show_ends { + "$\n".as_bytes() + } else { + "\n".as_bytes() + }; + // initialize tab simbol + let tab = if show_tabs { + "^I".as_bytes() + } else { + "\t".as_bytes() + }; let mut line_counter: usize = 1; @@ -153,12 +169,14 @@ fn write_bytes(files: Vec, number: NumberingMode, squeeze_blank: bool, // Flush all 1024 iterations. let mut flush_counter = 0usize..1024; - let mut in_buf = [0; 1024 * 32]; + let mut in_buf = [0; 1024 * 32]; let mut out_buf = [0; 1024 * 64]; let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); let mut at_line_start = true; while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { break } + if n == 0 { + break; + } for &byte in in_buf[..n].iter() { if flush_counter.next().is_none() { @@ -171,10 +189,7 @@ fn write_bytes(files: Vec, number: NumberingMode, squeeze_blank: bool, (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); line_counter += 1; } - if show_ends { - writer.write_all(&['$' as u8]).unwrap(); - } - writer.write_all(&['\n' as u8]).unwrap(); + writer.write_all(end_of_line).unwrap(); if interactive { writer.flush().unwrap(); } @@ -187,30 +202,28 @@ fn write_bytes(files: Vec, number: NumberingMode, squeeze_blank: bool, line_counter += 1; at_line_start = false; } + // This code is slow because of the many branches. cat in glibc avoids // this by having the whole loop inside show_nonprint. if byte == '\t' as u8 { - if show_tabs { - writer.write_all("^I".as_bytes()) + writer.write_all(tab) + } else if show_nonprint { + let byte = match byte { + 128...255 => { + writer.write_all("M-".as_bytes()).unwrap(); + byte - 128 + } + _ => byte, + }; + match byte { + 0...31 => writer.write_all(&['^' as u8, byte + 64]), + 127 => writer.write_all(&['^' as u8, byte - 64]), + _ => writer.write_all(&[byte]), + } } else { writer.write_all(&[byte]) } - } else if show_nonprint { - let byte = match byte { - 128 ... 255 => { - writer.write_all("M-".as_bytes()).unwrap(); - byte - 128 - }, - _ => byte, - }; - match byte { - 0 ... 31 => writer.write_all(&['^' as u8, byte + 64]), - 127 => writer.write_all(&['^' as u8, byte - 64]), - _ => writer.write_all(&[byte]), - } - } else { - writer.write_all(&[byte]) - }.unwrap(); + .unwrap(); } } } @@ -222,18 +235,29 @@ fn write_fast(files: Vec) { for (mut reader, _) in files.iter().filter_map(|p| open(&p[..])) { while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { break } + if n == 0 { + break; + } // This interface is completely broken. writer.write_all(&in_buf[..n]).unwrap(); } } } -fn exec(files: Vec, number: NumberingMode, show_nonprint: bool, - show_ends: bool, show_tabs: bool, squeeze_blank: bool) { +fn exec(files: Vec, + number: NumberingMode, + show_nonprint: bool, + show_ends: bool, + show_tabs: bool, + squeeze_blank: bool) { if show_nonprint || show_tabs { - write_bytes(files, number, squeeze_blank, show_ends, show_nonprint, show_tabs); + write_bytes(files, + number, + squeeze_blank, + show_ends, + show_tabs, + show_nonprint); } else if number != NumberingMode::NumberNone || squeeze_blank || show_ends { write_lines(files, number, squeeze_blank, show_ends); } else { @@ -253,7 +277,7 @@ fn open(path: &str) -> Option<(Box, bool)> { Err(e) => { (writeln!(&mut stderr(), "cat: {0}: {1}", path, e.to_string())).unwrap(); None - }, + } } } @@ -266,7 +290,7 @@ struct UnsafeWriter<'a, W: Write> { impl<'a, W: Write> UnsafeWriter<'a, W> { fn new(buf: &'a mut [u8], inner: W) -> UnsafeWriter<'a, W> { - let threshold = buf.len()/2; + let threshold = buf.len() / 2; UnsafeWriter { inner: inner, buf: buf, @@ -296,21 +320,21 @@ impl<'a, W: Write> UnsafeWriter<'a, W> { } } -#[inline(never)] -fn fail() -> ! { - panic!("assertion failed"); -} +//#[inline(never)] +//fn fail() -> ! { +// panic!("assertion failed"); +//} impl<'a, W: Write> Write for UnsafeWriter<'a, W> { fn write(&mut self, buf: &[u8]) -> Result { - let dst = &mut self.buf[self.pos..]; + //let dst = &mut self.buf[self.pos..]; let len = buf.len(); - if len > dst.len() { - fail(); - } - unsafe { - copy_nonoverlapping(buf.as_ptr(), dst.as_mut_ptr(), len) - } + // assertion is true for current code + // and it is a bottlneck place + // if len > dst.len() { + // fail(); + // } + unsafe { copy_nonoverlapping(buf.as_ptr(), self.buf[self.pos..].as_mut_ptr(), len) } self.pos += len; Ok(len) } From 2060048c796e689614b696a8c1537951f37f48c9 Mon Sep 17 00:00:00 2001 From: kevgeniy Date: Sun, 16 Oct 2016 17:13:02 +0200 Subject: [PATCH 2/7] Fixed and made the comments clearer --- src/cat/cat.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/cat/cat.rs b/src/cat/cat.rs index c1a81bf05..90db91ee8 100644 --- a/src/cat/cat.rs +++ b/src/cat/cat.rs @@ -320,6 +320,9 @@ impl<'a, W: Write> UnsafeWriter<'a, W> { } } +// This method was used in Write::write only. It must not be called in the correct cat code +// and is placed in the bottleneck place, so it is not used anymore. +// Uncomment this and code in Write::write for more convenient testing of a new cat version. //#[inline(never)] //fn fail() -> ! { // panic!("assertion failed"); @@ -329,8 +332,8 @@ impl<'a, W: Write> Write for UnsafeWriter<'a, W> { fn write(&mut self, buf: &[u8]) -> Result { //let dst = &mut self.buf[self.pos..]; let len = buf.len(); - // assertion is true for current code - // and it is a bottlneck place + // the condition is false for current code and every correct code + // see fail method comment for more information // if len > dst.len() { // fail(); // } From 4a2b8e3c5224b70c7c975716d612056f6fd7523e Mon Sep 17 00:00:00 2001 From: evgeniy Date: Tue, 18 Oct 2016 19:09:36 +0200 Subject: [PATCH 3/7] cat: conditional compilation added --- src/cat/cat.rs | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/cat/cat.rs b/src/cat/cat.rs index 90db91ee8..d1e003576 100644 --- a/src/cat/cat.rs +++ b/src/cat/cat.rs @@ -321,22 +321,32 @@ impl<'a, W: Write> UnsafeWriter<'a, W> { } // This method was used in Write::write only. It must not be called in the correct cat code -// and is placed in the bottleneck place, so it is not used anymore. -// Uncomment this and code in Write::write for more convenient testing of a new cat version. -//#[inline(never)] -//fn fail() -> ! { -// panic!("assertion failed"); -//} +// and is placed in the bottleneck place, so it is not used anymore in release build. +// In debug buld it is used to support bug finding +#[cfg(debug_assertions)] +#[inline(never)] +fn fail() -> ! { + panic!("assertion failed"); +} impl<'a, W: Write> Write for UnsafeWriter<'a, W> { + #[cfg(debug_assertions)] + fn write(&mut self, buf: &[u8]) -> Result { + let dst = &mut self.buf[self.pos..]; + let len = buf.len(); + if len > dst.len() { + fail(); + } + unsafe { copy_nonoverlapping(buf.as_ptr(), dst.as_mut_ptr(), len) } + self.pos += len; + Ok(len) + } + + // a condition check excluded because it is false for current and every correct code + // see fail method comment for more information + #[cfg(not(debug_assertions))] fn write(&mut self, buf: &[u8]) -> Result { - //let dst = &mut self.buf[self.pos..]; let len = buf.len(); - // the condition is false for current code and every correct code - // see fail method comment for more information - // if len > dst.len() { - // fail(); - // } unsafe { copy_nonoverlapping(buf.as_ptr(), self.buf[self.pos..].as_mut_ptr(), len) } self.pos += len; Ok(len) From 72490ef9568442ca3561e5ac8eb9dd0a603a3210 Mon Sep 17 00:00:00 2001 From: kevgeniy Date: Mon, 24 Oct 2016 04:35:37 +0300 Subject: [PATCH 4/7] cat: rewrote most part of the code Rewrote cat to eliminate code duplication and make it safe - UnsafeWriter is replaced by BufWriter - write_lines (any option except -T and -v) and write_bytes (-T and -v options) are replaced by single write_lines method. A new method use ``write_to_end``, ``write_tab_to_end`` or ``write_nonprint_to_end`` method to write all symbols untill the end of line in the right way. - Benchmarking (-n, -T and -v options respectively): | old (ns/iter) | new (ns/iter) | | -------------------------- | -------------------------- | | 6,501,496 (+/- 1,173,481) | 6,683,158 (+/- 373,539) | | 8,634,023 (+/- 547,595) | 5,408,676 (+/- 715,458) | | 24,056,507 (+/- 1,177,445) | 30,879,788 (+/- 1,180,598) | --- src/cat/cat.rs | 424 ++++++++++++++++++------------------------------- 1 file changed, 152 insertions(+), 272 deletions(-) diff --git a/src/cat/cat.rs b/src/cat/cat.rs index d1e003576..1058b434d 100644 --- a/src/cat/cat.rs +++ b/src/cat/cat.rs @@ -3,19 +3,17 @@ // This file is part of the uutils coreutils package. // // (c) Jordi Boggiano +// (c) Evgeniy Klyuchikov // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. // - -// last synced with: cat (GNU coreutils) 8.13 - #[macro_use] extern crate uucore; +// last synced with: cat (GNU coreutils) 8.13 use std::fs::File; -use std::intrinsics::{copy_nonoverlapping}; -use std::io::{stdout, stdin, stderr, Write, Read, Result}; +use std::io::{stdout, stdin, stderr, Write, Read, BufWriter}; use uucore::fs::is_stdin_interactive; static SYNTAX: &'static str = "[OPTION]... [FILE]..."; @@ -57,213 +55,27 @@ pub fn uumain(args: Vec) -> i32 { files.push("-".to_owned()); } - exec(files, - number_mode, - show_nonprint, - show_ends, - show_tabs, - squeeze_blank); - - 0 -} - -#[derive(Eq, PartialEq)] -enum NumberingMode { - NumberNone, - NumberNonEmpty, - NumberAll, -} - -fn write_lines(files: Vec, number: NumberingMode, squeeze_blank: bool, show_ends: bool) { - // initialize end of line - let end_of_line = if show_ends { - "$\n".as_bytes() - } else { - "\n".as_bytes() - }; - - let mut line_counter: usize = 1; - - for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) { - - let mut in_buf = [0; 1024 * 31]; - let mut out_buf = [0; 1024 * 64]; - let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); - let mut at_line_start = true; - let mut one_blank_kept = false; - while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { - break; - } - - let in_buf = &in_buf[..n]; - let mut pos = 0; - while pos < n { - writer.possibly_flush(); - if in_buf[pos] == '\n' as u8 { - if !at_line_start || !squeeze_blank || !one_blank_kept { - one_blank_kept = true; - if at_line_start && number == NumberingMode::NumberAll { - (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); - line_counter += 1; - } - writer.write_all(end_of_line).unwrap(); - if interactive { - writer.flush().unwrap(); - } - } - at_line_start = true; - pos += 1; - continue; - } else if one_blank_kept { - one_blank_kept = false; - } - if at_line_start && number != NumberingMode::NumberNone { - (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); - line_counter += 1; - } - match in_buf[pos..].iter().position(|c| *c == '\n' as u8) { - Some(p) => { - writer.write_all(&in_buf[pos..pos + p]).unwrap(); - writer.write_all(end_of_line).unwrap(); - if interactive { - writer.flush().unwrap(); - } - pos += p + 1; - at_line_start = true; - } - None => { - writer.write_all(&in_buf[pos..]).unwrap(); - at_line_start = false; - break; - } - }; - } - } - } -} - -fn write_bytes(files: Vec, - number: NumberingMode, - squeeze_blank: bool, - show_ends: bool, - show_tabs: bool, - show_nonprint: bool) { - // initialize end of line - let end_of_line = if show_ends { - "$\n".as_bytes() - } else { - "\n".as_bytes() - }; - // initialize tab simbol - let tab = if show_tabs { - "^I".as_bytes() - } else { - "\t".as_bytes() - }; - - let mut line_counter: usize = 1; - - for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) { - - // Flush all 1024 iterations. - let mut flush_counter = 0usize..1024; - - let mut in_buf = [0; 1024 * 32]; - let mut out_buf = [0; 1024 * 64]; - let mut writer = UnsafeWriter::new(&mut out_buf[..], stdout()); - let mut at_line_start = true; - while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { - break; - } - - for &byte in in_buf[..n].iter() { - if flush_counter.next().is_none() { - writer.possibly_flush(); - flush_counter = 0usize..1024; - } - if byte == '\n' as u8 { - if !at_line_start || !squeeze_blank { - if at_line_start && number == NumberingMode::NumberAll { - (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); - line_counter += 1; - } - writer.write_all(end_of_line).unwrap(); - if interactive { - writer.flush().unwrap(); - } - } - at_line_start = true; - continue; - } - if at_line_start && number != NumberingMode::NumberNone { - (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); - line_counter += 1; - at_line_start = false; - } - - // This code is slow because of the many branches. cat in glibc avoids - // this by having the whole loop inside show_nonprint. - if byte == '\t' as u8 { - writer.write_all(tab) - } else if show_nonprint { - let byte = match byte { - 128...255 => { - writer.write_all("M-".as_bytes()).unwrap(); - byte - 128 - } - _ => byte, - }; - match byte { - 0...31 => writer.write_all(&['^' as u8, byte + 64]), - 127 => writer.write_all(&['^' as u8, byte - 64]), - _ => writer.write_all(&[byte]), - } - } else { - writer.write_all(&[byte]) - } - .unwrap(); - } - } - } -} - -fn write_fast(files: Vec) { - let mut writer = stdout(); - let mut in_buf = [0; 1024 * 64]; - - for (mut reader, _) in files.iter().filter_map(|p| open(&p[..])) { - while let Ok(n) = reader.read(&mut in_buf) { - if n == 0 { - break; - } - // This interface is completely broken. - writer.write_all(&in_buf[..n]).unwrap(); - } - } -} - -fn exec(files: Vec, - number: NumberingMode, - show_nonprint: bool, - show_ends: bool, - show_tabs: bool, - squeeze_blank: bool) { - - if show_nonprint || show_tabs { - write_bytes(files, - number, + if show_tabs || show_nonprint || show_ends || squeeze_blank || + number_mode != NumberingMode::NumberNone { + write_lines(files, + number_mode, squeeze_blank, show_ends, show_tabs, show_nonprint); - } else if number != NumberingMode::NumberNone || squeeze_blank || show_ends { - write_lines(files, number, squeeze_blank, show_ends); } else { write_fast(files); } pipe_flush!(); + + 0 +} + +#[derive(PartialEq)] +enum NumberingMode { + NumberNone, + NumberNonEmpty, + NumberAll, } fn open(path: &str) -> Option<(Box, bool)> { @@ -281,84 +93,152 @@ fn open(path: &str) -> Option<(Box, bool)> { } } -struct UnsafeWriter<'a, W: Write> { - inner: W, - buf: &'a mut [u8], - pos: usize, - threshold: usize, -} +fn write_fast(files: Vec) { + let mut writer = stdout(); + let mut in_buf = [0; 1024 * 64]; -impl<'a, W: Write> UnsafeWriter<'a, W> { - fn new(buf: &'a mut [u8], inner: W) -> UnsafeWriter<'a, W> { - let threshold = buf.len() / 2; - UnsafeWriter { - inner: inner, - buf: buf, - pos: 0, - threshold: threshold, - } - } - - fn flush_buf(&mut self) -> Result<()> { - if self.pos != 0 { - let ret = self.inner.write(&self.buf[..self.pos]); - self.pos = 0; - match ret { - Ok(_) => Ok(()), - Err(e) => Err(e), + for (mut reader, _) in files.iter().filter_map(|p| open(&p[..])) { + while let Ok(n) = reader.read(&mut in_buf) { + if n == 0 { + break; } - } else { - Ok(()) - } - } - - fn possibly_flush(&mut self) { - if self.pos > self.threshold { - self.inner.write_all(&self.buf[..self.pos]).unwrap(); - self.pos = 0; + writer.write_all(&in_buf[..n]).unwrap(); } } } -// This method was used in Write::write only. It must not be called in the correct cat code -// and is placed in the bottleneck place, so it is not used anymore in release build. -// In debug buld it is used to support bug finding -#[cfg(debug_assertions)] -#[inline(never)] -fn fail() -> ! { - panic!("assertion failed"); -} +fn write_lines(files: Vec, + number: NumberingMode, + squeeze_blank: bool, + show_ends: bool, + show_tabs: bool, + show_nonprint: bool) { + // initialize end of line + let end_of_line = if show_ends { + "$\n".as_bytes() + } else { + "\n".as_bytes() + }; + // initialize tab simbol + let tab = if show_tabs { + "^I".as_bytes() + } else { + "\t".as_bytes() + }; + let mut line_counter: usize = 1; -impl<'a, W: Write> Write for UnsafeWriter<'a, W> { - #[cfg(debug_assertions)] - fn write(&mut self, buf: &[u8]) -> Result { - let dst = &mut self.buf[self.pos..]; - let len = buf.len(); - if len > dst.len() { - fail(); + for (mut reader, interactive) in files.iter().filter_map(|p| open(&p[..])) { + let mut in_buf = [0; 1024 * 31]; + let mut writer = BufWriter::with_capacity(1024 * 64, stdout()); + let mut at_line_start = true; + let mut one_blank_kept = false; + while let Ok(n) = reader.read(&mut in_buf) { + if n == 0 { + break; + } + let in_buf = &in_buf[..n]; + let mut pos = 0; + while pos < n { + // skip empty lines enumerating them if needed + if in_buf[pos] == '\n' as u8 { + if !at_line_start || !squeeze_blank || !one_blank_kept { + one_blank_kept = true; + if at_line_start && number == NumberingMode::NumberAll { + (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); + line_counter += 1; + } + writer.write_all(end_of_line).unwrap(); + if interactive { + writer.flush().unwrap(); + } + } + at_line_start = true; + pos += 1; + continue; + } + one_blank_kept = false; + if at_line_start && number != NumberingMode::NumberNone { + (write!(&mut writer, "{0:6}\t", line_counter)).unwrap(); + line_counter += 1; + } + + // print to end of line or end of buffer + let offset = if show_nonprint { + write_nonprint_to_end(&in_buf[pos..], &mut writer, tab) + } else if show_tabs { + write_tab_to_end(&in_buf[pos..], &mut writer) + } else { + write_to_end(&in_buf[pos..], &mut writer) + }; + // end of buffer? + if offset == 0 { + at_line_start = false; + break; + } + // print suitable end of line + writer.write_all(end_of_line).unwrap(); + if interactive { + writer.flush().unwrap(); + } + at_line_start = true; + pos += offset; + } } - unsafe { copy_nonoverlapping(buf.as_ptr(), dst.as_mut_ptr(), len) } - self.pos += len; - Ok(len) - } - - // a condition check excluded because it is false for current and every correct code - // see fail method comment for more information - #[cfg(not(debug_assertions))] - fn write(&mut self, buf: &[u8]) -> Result { - let len = buf.len(); - unsafe { copy_nonoverlapping(buf.as_ptr(), self.buf[self.pos..].as_mut_ptr(), len) } - self.pos += len; - Ok(len) - } - - fn flush(&mut self) -> Result<()> { - self.flush_buf().and_then(|()| self.inner.flush()) } } -impl<'a, W: Write> Drop for UnsafeWriter<'a, W> { - fn drop(&mut self) { - let _ = self.flush_buf(); +// write***_to_end methods +// Write all simbols till end of line or end of buffer +// Return the number of written bytes - 1 or 0 if the end of buffer is reached +fn write_to_end(in_buf: &[u8], writer: &mut W) -> usize { + match in_buf.iter().position(|c| *c == '\n' as u8) { + Some(p) => { + writer.write_all(&in_buf[..p]).unwrap(); + p + 1 + } + None => { + writer.write_all(in_buf).unwrap(); + 0 + } } } + +fn write_tab_to_end(in_buf: &[u8], writer: &mut W) -> usize { + match in_buf.iter().position(|c| *c == '\n' as u8 || *c == '\t' as u8) { + Some(p) => { + writer.write_all(&in_buf[..p]).unwrap(); + if in_buf[p] == '\n' as u8 { + p + 1 + } else { + writer.write_all("^I".as_bytes()).unwrap(); + write_tab_to_end(&in_buf[p + 1..], writer) + } + } + None => { + writer.write_all(in_buf).unwrap(); + 0 + } + } +} + +fn write_nonprint_to_end(in_buf: &[u8], writer: &mut W, tab: &[u8]) -> usize { + let mut count = 0; + + for byte in in_buf.iter().map(|c| *c) { + if byte == '\n' as u8 { + break; + } + match byte { + 9 => writer.write_all(tab), + 0...8 | 10...31 => writer.write_all(&['^' as u8, byte + 64]), + 32...126 => writer.write_all(&[byte]), + 127 => writer.write_all(&['^' as u8, byte - 64]), + 128...159 => writer.write_all(&['M' as u8, '-' as u8, '^' as u8, byte - 64]), + 160...254 => writer.write_all(&['M' as u8, '-' as u8, byte - 128]), + _ => writer.write_all(&['M' as u8, '-' as u8, '^' as u8, 63]), + } + .unwrap(); + count += 1; + } + if count != in_buf.len() { count + 1 } else { 0 } +} \ No newline at end of file From 6228b06e29a6e12c947fb7904d64fbdcb5ab7b19 Mon Sep 17 00:00:00 2001 From: kevgeniy Date: Mon, 24 Oct 2016 05:23:54 +0300 Subject: [PATCH 5/7] cat: remove libc dependency --- src/cat/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cat/Cargo.toml b/src/cat/Cargo.toml index dab41706e..9b54d06b5 100644 --- a/src/cat/Cargo.toml +++ b/src/cat/Cargo.toml @@ -8,7 +8,6 @@ name = "uu_cat" path = "cat.rs" [dependencies] -libc = "*" uucore = { path="../uucore" } [[bin]] From 0d56009c3a1315b0c3f7d3b650d3b831d89e34e8 Mon Sep 17 00:00:00 2001 From: kevgeniy Date: Mon, 24 Oct 2016 20:18:44 +0300 Subject: [PATCH 6/7] cat: remove recursion --- src/cat/cat.rs | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/src/cat/cat.rs b/src/cat/cat.rs index 1058b434d..cbcf774ba 100644 --- a/src/cat/cat.rs +++ b/src/cat/cat.rs @@ -188,8 +188,8 @@ fn write_lines(files: Vec, } // write***_to_end methods -// Write all simbols till end of line or end of buffer -// Return the number of written bytes - 1 or 0 if the end of buffer is reached +// Write all symbols till end of line or end of buffer is reached +// Return the (number of written symbols + 1) or 0 if the end of buffer is reached fn write_to_end(in_buf: &[u8], writer: &mut W) -> usize { match in_buf.iter().position(|c| *c == '\n' as u8) { Some(p) => { @@ -203,21 +203,23 @@ fn write_to_end(in_buf: &[u8], writer: &mut W) -> usize { } } -fn write_tab_to_end(in_buf: &[u8], writer: &mut W) -> usize { - match in_buf.iter().position(|c| *c == '\n' as u8 || *c == '\t' as u8) { - Some(p) => { - writer.write_all(&in_buf[..p]).unwrap(); - if in_buf[p] == '\n' as u8 { - p + 1 - } else { - writer.write_all("^I".as_bytes()).unwrap(); - write_tab_to_end(&in_buf[p + 1..], writer) +fn write_tab_to_end(mut in_buf: &[u8], writer: &mut W) -> usize { + loop { + match in_buf.iter().position(|c| *c == '\n' as u8 || *c == '\t' as u8) { + Some(p) => { + writer.write_all(&in_buf[..p]).unwrap(); + if in_buf[p] == '\n' as u8 { + return p + 1; + } else { + writer.write_all("^I".as_bytes()).unwrap(); + in_buf = &in_buf[p + 1..]; + } } - } - None => { - writer.write_all(in_buf).unwrap(); - 0 - } + None => { + writer.write_all(in_buf).unwrap(); + return 0; + } + }; } } @@ -241,4 +243,4 @@ fn write_nonprint_to_end(in_buf: &[u8], writer: &mut W, tab: &[u8]) -> count += 1; } if count != in_buf.len() { count + 1 } else { 0 } -} \ No newline at end of file +} From 6d13bcf2da6486feebd8ec20078c9a7926a7e80e Mon Sep 17 00:00:00 2001 From: kevgeniy Date: Fri, 18 Nov 2016 22:30:12 +0300 Subject: [PATCH 7/7] cat: fixed indentation --- src/cat/cat.rs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/cat/cat.rs b/src/cat/cat.rs index cbcf774ba..fb0c6cc16 100644 --- a/src/cat/cat.rs +++ b/src/cat/cat.rs @@ -231,15 +231,14 @@ fn write_nonprint_to_end(in_buf: &[u8], writer: &mut W, tab: &[u8]) -> break; } match byte { - 9 => writer.write_all(tab), - 0...8 | 10...31 => writer.write_all(&['^' as u8, byte + 64]), - 32...126 => writer.write_all(&[byte]), - 127 => writer.write_all(&['^' as u8, byte - 64]), - 128...159 => writer.write_all(&['M' as u8, '-' as u8, '^' as u8, byte - 64]), - 160...254 => writer.write_all(&['M' as u8, '-' as u8, byte - 128]), - _ => writer.write_all(&['M' as u8, '-' as u8, '^' as u8, 63]), - } - .unwrap(); + 9 => writer.write_all(tab), + 0...8 | 10...31 => writer.write_all(&['^' as u8, byte + 64]), + 32...126 => writer.write_all(&[byte]), + 127 => writer.write_all(&['^' as u8, byte - 64]), + 128...159 => writer.write_all(&['M' as u8, '-' as u8, '^' as u8, byte - 64]), + 160...254 => writer.write_all(&['M' as u8, '-' as u8, byte - 128]), + _ => writer.write_all(&['M' as u8, '-' as u8, '^' as u8, 63]), + }.unwrap(); count += 1; } if count != in_buf.len() { count + 1 } else { 0 }