1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

Merge pull request #2552 from miDeb/cat/show-crlf

cat: show `\r\n` as `^M$` when `-E` is enabled
This commit is contained in:
Sylvestre Ledru 2021-08-09 12:23:59 +02:00 committed by GitHub
commit ee98efd92e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 73 additions and 25 deletions

View file

@ -128,6 +128,12 @@ struct OutputState {
/// Whether the output cursor is at the beginning of a new line /// Whether the output cursor is at the beginning of a new line
at_line_start: bool, at_line_start: bool,
/// Whether we skipped a \r, which still needs to be printed
skipped_carriage_return: bool,
/// Whether we have already printed a blank line
one_blank_kept: bool,
} }
/// Represents an open file handle, stream, or other device /// Represents an open file handle, stream, or other device
@ -373,6 +379,8 @@ fn cat_files(files: Vec<String>, options: &OutputOptions) -> UResult<()> {
let mut state = OutputState { let mut state = OutputState {
line_number: 1, line_number: 1,
at_line_start: true, at_line_start: true,
skipped_carriage_return: false,
one_blank_kept: false,
}; };
let mut error_messages: Vec<String> = Vec::new(); let mut error_messages: Vec<String> = Vec::new();
@ -381,6 +389,9 @@ fn cat_files(files: Vec<String>, options: &OutputOptions) -> UResult<()> {
error_messages.push(format!("{}: {}", path, err)); error_messages.push(format!("{}: {}", path, err));
} }
} }
if state.skipped_carriage_return {
print!("\r");
}
if error_messages.is_empty() { if error_messages.is_empty() {
Ok(()) Ok(())
} else { } else {
@ -458,7 +469,6 @@ fn write_lines<R: Read>(
let mut in_buf = [0; 1024 * 31]; let mut in_buf = [0; 1024 * 31];
let stdout = io::stdout(); let stdout = io::stdout();
let mut writer = stdout.lock(); let mut writer = stdout.lock();
let mut one_blank_kept = false;
while let Ok(n) = handle.reader.read(&mut in_buf) { while let Ok(n) = handle.reader.read(&mut in_buf) {
if n == 0 { if n == 0 {
@ -469,8 +479,13 @@ fn write_lines<R: Read>(
while pos < n { while pos < n {
// skip empty line_number enumerating them if needed // skip empty line_number enumerating them if needed
if in_buf[pos] == b'\n' { if in_buf[pos] == b'\n' {
if !state.at_line_start || !options.squeeze_blank || !one_blank_kept { // \r followed by \n is printed as ^M when show_ends is enabled, so that \r\n prints as ^M$
one_blank_kept = true; if state.skipped_carriage_return && options.show_ends {
writer.write_all(b"^M")?;
state.skipped_carriage_return = false;
}
if !state.at_line_start || !options.squeeze_blank || !state.one_blank_kept {
state.one_blank_kept = true;
if state.at_line_start && options.number == NumberingMode::All { if state.at_line_start && options.number == NumberingMode::All {
write!(&mut writer, "{0:6}\t", state.line_number)?; write!(&mut writer, "{0:6}\t", state.line_number)?;
state.line_number += 1; state.line_number += 1;
@ -484,7 +499,12 @@ fn write_lines<R: Read>(
pos += 1; pos += 1;
continue; continue;
} }
one_blank_kept = false; if state.skipped_carriage_return {
writer.write_all(b"\r")?;
state.skipped_carriage_return = false;
state.at_line_start = false;
}
state.one_blank_kept = false;
if state.at_line_start && options.number != NumberingMode::None { if state.at_line_start && options.number != NumberingMode::None {
write!(&mut writer, "{0:6}\t", state.line_number)?; write!(&mut writer, "{0:6}\t", state.line_number)?;
state.line_number += 1; state.line_number += 1;
@ -499,17 +519,22 @@ fn write_lines<R: Read>(
write_to_end(&in_buf[pos..], &mut writer) write_to_end(&in_buf[pos..], &mut writer)
}; };
// end of buffer? // end of buffer?
if offset == 0 { if offset + pos == in_buf.len() {
state.at_line_start = false; state.at_line_start = false;
break; break;
} }
// print suitable end of line if in_buf[pos + offset] == b'\r' {
writer.write_all(options.end_of_line().as_bytes())?; state.skipped_carriage_return = true;
if handle.is_interactive { } else {
writer.flush()?; assert_eq!(in_buf[pos + offset], b'\n');
// print suitable end of line
writer.write_all(options.end_of_line().as_bytes())?;
if handle.is_interactive {
writer.flush()?;
}
state.at_line_start = true;
} }
state.at_line_start = true; pos += offset + 1;
pos += offset;
} }
} }
@ -517,17 +542,19 @@ fn write_lines<R: Read>(
} }
// write***_to_end methods // write***_to_end methods
// Write all symbols till end of line or end of buffer is reached // Write all symbols till \n or \r or end of buffer is reached
// Return the (number of written symbols + 1) or 0 if the end of buffer is reached // We need to stop at \r because it may be written as ^M depending on the byte after and settings;
// however, write_nonprint_to_end doesn't need to stop at \r because it will always write \r as ^M.
// Return the number of written symbols
fn write_to_end<W: Write>(in_buf: &[u8], writer: &mut W) -> usize { fn write_to_end<W: Write>(in_buf: &[u8], writer: &mut W) -> usize {
match in_buf.iter().position(|c| *c == b'\n') { match in_buf.iter().position(|c| *c == b'\n' || *c == b'\r') {
Some(p) => { Some(p) => {
writer.write_all(&in_buf[..p]).unwrap(); writer.write_all(&in_buf[..p]).unwrap();
p + 1 p
} }
None => { None => {
writer.write_all(in_buf).unwrap(); writer.write_all(in_buf).unwrap();
0 in_buf.len()
} }
} }
} }
@ -535,20 +562,25 @@ fn write_to_end<W: Write>(in_buf: &[u8], writer: &mut W) -> usize {
fn write_tab_to_end<W: Write>(mut in_buf: &[u8], writer: &mut W) -> usize { fn write_tab_to_end<W: Write>(mut in_buf: &[u8], writer: &mut W) -> usize {
let mut count = 0; let mut count = 0;
loop { loop {
match in_buf.iter().position(|c| *c == b'\n' || *c == b'\t') { match in_buf
.iter()
.position(|c| *c == b'\n' || *c == b'\t' || *c == b'\r')
{
Some(p) => { Some(p) => {
writer.write_all(&in_buf[..p]).unwrap(); writer.write_all(&in_buf[..p]).unwrap();
if in_buf[p] == b'\n' { if in_buf[p] == b'\n' {
return count + p + 1; return count + p;
} else { } else if in_buf[p] == b'\t' {
writer.write_all(b"^I").unwrap(); writer.write_all(b"^I").unwrap();
in_buf = &in_buf[p + 1..]; in_buf = &in_buf[p + 1..];
count += p + 1; count += p + 1;
} else {
return count + p;
} }
} }
None => { None => {
writer.write_all(in_buf).unwrap(); writer.write_all(in_buf).unwrap();
return 0; return in_buf.len();
} }
}; };
} }
@ -573,11 +605,7 @@ fn write_nonprint_to_end<W: Write>(in_buf: &[u8], writer: &mut W, tab: &[u8]) ->
.unwrap(); .unwrap();
count += 1; count += 1;
} }
if count != in_buf.len() { count
count + 1
} else {
0
}
} }
#[cfg(test)] #[cfg(test)]

View file

@ -273,6 +273,26 @@ fn test_stdin_show_ends() {
} }
} }
#[test]
fn squeeze_all_files() {
// empty lines at the end of a file are "squeezed" together with empty lines at the beginning
let (at, mut ucmd) = at_and_ucmd!();
at.write("input1", "a\n\n");
at.write("input2", "\n\nb");
ucmd.args(&["input1", "input2", "-s"])
.succeeds()
.stdout_only("a\n\nb");
}
#[test]
fn test_show_ends_crlf() {
new_ucmd!()
.arg("-E")
.pipe_in("a\nb\r\n\rc\n\r\n\r")
.succeeds()
.stdout_only("a$\nb^M$\n\rc$\n^M$\n\r");
}
#[test] #[test]
fn test_stdin_show_all() { fn test_stdin_show_all() {
for same_param in &["-A", "--show-all"] { for same_param in &["-A", "--show-all"] {