From 5be4c48546b58e8ce591ec37823eb3ca69858a02 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 7 Aug 2021 21:05:47 +0200 Subject: [PATCH 1/2] cat: show \r\n as ^M$ when -E is enabled This functionality was recently added to GNU cat, but had a bug. This implementation will be commpatible with gnu once the bug in gnu is fixed. --- src/uu/cat/src/cat.rs | 67 +++++++++++++++++++++++++++------------ tests/by-util/test_cat.rs | 8 +++++ 2 files changed, 54 insertions(+), 21 deletions(-) diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index f340fa9fa..6b6f68b13 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -123,6 +123,9 @@ struct OutputState { /// Whether the output cursor is at the beginning of a new line at_line_start: bool, + + /// Whether we skipped a \r, which still needs to be printed + skipped_carriage_return: bool, } /// Represents an open file handle, stream, or other device @@ -339,6 +342,7 @@ fn cat_files(files: Vec, options: &OutputOptions) -> UResult<()> { let mut state = OutputState { line_number: 1, at_line_start: true, + skipped_carriage_return: false, }; let mut error_messages: Vec = Vec::new(); @@ -347,6 +351,9 @@ fn cat_files(files: Vec, options: &OutputOptions) -> UResult<()> { error_messages.push(format!("{}: {}", path, err)); } } + if state.skipped_carriage_return { + print!("\r"); + } if error_messages.is_empty() { Ok(()) } else { @@ -435,6 +442,11 @@ fn write_lines( while pos < n { // skip empty line_number enumerating them if needed if in_buf[pos] == b'\n' { + // \r followed by \n is printed as ^M when show_ends is enabled, so that \r\n prints as ^M$ + if state.skipped_carriage_return && options.show_ends { + writer.write_all(b"^M")?; + state.skipped_carriage_return = false; + } if !state.at_line_start || !options.squeeze_blank || !one_blank_kept { one_blank_kept = true; if state.at_line_start && options.number == NumberingMode::All { @@ -450,6 +462,11 @@ fn write_lines( pos += 1; continue; } + if state.skipped_carriage_return { + writer.write_all(b"\r")?; + state.skipped_carriage_return = false; + state.at_line_start = false; + } one_blank_kept = false; if state.at_line_start && options.number != NumberingMode::None { write!(&mut writer, "{0:6}\t", state.line_number)?; @@ -465,17 +482,22 @@ fn write_lines( write_to_end(&in_buf[pos..], &mut writer) }; // end of buffer? - if offset == 0 { + if offset + pos == in_buf.len() { state.at_line_start = false; break; } - // print suitable end of line - writer.write_all(options.end_of_line().as_bytes())?; - if handle.is_interactive { - writer.flush()?; + if in_buf[pos + offset] == b'\r' { + state.skipped_carriage_return = true; + } else { + assert_eq!(in_buf[pos + offset], b'\n'); + // print suitable end of line + writer.write_all(options.end_of_line().as_bytes())?; + if handle.is_interactive { + writer.flush()?; + } + state.at_line_start = true; } - state.at_line_start = true; - pos += offset; + pos += offset + 1; } } @@ -483,17 +505,19 @@ fn write_lines( } // write***_to_end methods -// Write all symbols till end of line or end of buffer is reached -// Return the (number of written symbols + 1) or 0 if the end of buffer is reached +// Write all symbols till \n or \r or end of buffer is reached +// We need to stop at \r because it may be written as ^M depending on the byte after and settings; +// however, write_nonprint_to_end doesn't need to stop at \r because it will always write \r as ^M. +// Return the number of written symbols fn write_to_end(in_buf: &[u8], writer: &mut W) -> usize { - match in_buf.iter().position(|c| *c == b'\n') { + match in_buf.iter().position(|c| *c == b'\n' || *c == b'\r') { Some(p) => { writer.write_all(&in_buf[..p]).unwrap(); - p + 1 + p } None => { writer.write_all(in_buf).unwrap(); - 0 + in_buf.len() } } } @@ -501,20 +525,25 @@ fn write_to_end(in_buf: &[u8], writer: &mut W) -> usize { fn write_tab_to_end(mut in_buf: &[u8], writer: &mut W) -> usize { let mut count = 0; loop { - match in_buf.iter().position(|c| *c == b'\n' || *c == b'\t') { + match in_buf + .iter() + .position(|c| *c == b'\n' || *c == b'\t' || *c == b'\r') + { Some(p) => { writer.write_all(&in_buf[..p]).unwrap(); if in_buf[p] == b'\n' { - return count + p + 1; - } else { + return count + p; + } else if in_buf[p] == b'\t' { writer.write_all(b"^I").unwrap(); in_buf = &in_buf[p + 1..]; count += p + 1; + } else { + return count + p; } } None => { writer.write_all(in_buf).unwrap(); - return 0; + return in_buf.len(); } }; } @@ -539,11 +568,7 @@ fn write_nonprint_to_end(in_buf: &[u8], writer: &mut W, tab: &[u8]) -> .unwrap(); count += 1; } - if count != in_buf.len() { - count + 1 - } else { - 0 - } + count } #[cfg(test)] diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index d83b5515b..eb50d7dce 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -273,6 +273,14 @@ fn test_stdin_show_ends() { .stdout_only("\t\0$\n\t"); } } +#[test] +fn test_show_ends_crlf() { + new_ucmd!() + .arg("-E") + .pipe_in("a\nb\r\n\rc\n\r\n\r") + .succeeds() + .stdout_only("a$\nb^M$\n\rc$\n^M$\n\r"); +} #[test] fn test_stdin_show_all() { From 722936021747ec25df26f32e1aa17e3fe07c33b2 Mon Sep 17 00:00:00 2001 From: Michael Debertol Date: Sat, 7 Aug 2021 21:31:05 +0200 Subject: [PATCH 2/2] cat: remove all per-file state cat cannot keep per-file state, so move all remaining state (one_blank_kept) to the global state. --- src/uu/cat/src/cat.rs | 11 +++++++---- tests/by-util/test_cat.rs | 12 ++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/uu/cat/src/cat.rs b/src/uu/cat/src/cat.rs index 6b6f68b13..c71f60a7e 100644 --- a/src/uu/cat/src/cat.rs +++ b/src/uu/cat/src/cat.rs @@ -126,6 +126,9 @@ struct OutputState { /// Whether we skipped a \r, which still needs to be printed skipped_carriage_return: bool, + + /// Whether we have already printed a blank line + one_blank_kept: bool, } /// Represents an open file handle, stream, or other device @@ -343,6 +346,7 @@ fn cat_files(files: Vec, options: &OutputOptions) -> UResult<()> { line_number: 1, at_line_start: true, skipped_carriage_return: false, + one_blank_kept: false, }; let mut error_messages: Vec = Vec::new(); @@ -431,7 +435,6 @@ fn write_lines( let mut in_buf = [0; 1024 * 31]; let stdout = io::stdout(); let mut writer = stdout.lock(); - let mut one_blank_kept = false; while let Ok(n) = handle.reader.read(&mut in_buf) { if n == 0 { @@ -447,8 +450,8 @@ fn write_lines( writer.write_all(b"^M")?; state.skipped_carriage_return = false; } - if !state.at_line_start || !options.squeeze_blank || !one_blank_kept { - one_blank_kept = true; + if !state.at_line_start || !options.squeeze_blank || !state.one_blank_kept { + state.one_blank_kept = true; if state.at_line_start && options.number == NumberingMode::All { write!(&mut writer, "{0:6}\t", state.line_number)?; state.line_number += 1; @@ -467,7 +470,7 @@ fn write_lines( state.skipped_carriage_return = false; state.at_line_start = false; } - one_blank_kept = false; + state.one_blank_kept = false; if state.at_line_start && options.number != NumberingMode::None { write!(&mut writer, "{0:6}\t", state.line_number)?; state.line_number += 1; diff --git a/tests/by-util/test_cat.rs b/tests/by-util/test_cat.rs index eb50d7dce..e0bc49339 100644 --- a/tests/by-util/test_cat.rs +++ b/tests/by-util/test_cat.rs @@ -273,6 +273,18 @@ fn test_stdin_show_ends() { .stdout_only("\t\0$\n\t"); } } + +#[test] +fn squeeze_all_files() { + // empty lines at the end of a file are "squeezed" together with empty lines at the beginning + let (at, mut ucmd) = at_and_ucmd!(); + at.write("input1", "a\n\n"); + at.write("input2", "\n\nb"); + ucmd.args(&["input1", "input2", "-s"]) + .succeeds() + .stdout_only("a\n\nb"); +} + #[test] fn test_show_ends_crlf() { new_ucmd!()