1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 11:37:44 +00:00

cat: show \r\n as ^M$ when -E is enabled

This functionality was recently added to GNU cat, but had a bug.
This implementation will be commpatible with gnu once the bug in gnu is fixed.
This commit is contained in:
Michael Debertol 2021-08-07 21:05:47 +02:00
parent d967a7a553
commit 5be4c48546
2 changed files with 54 additions and 21 deletions

View file

@ -123,6 +123,9 @@ struct OutputState {
/// Whether the output cursor is at the beginning of a new line /// Whether the output cursor is at the beginning of a new line
at_line_start: bool, at_line_start: bool,
/// Whether we skipped a \r, which still needs to be printed
skipped_carriage_return: bool,
} }
/// Represents an open file handle, stream, or other device /// Represents an open file handle, stream, or other device
@ -339,6 +342,7 @@ fn cat_files(files: Vec<String>, options: &OutputOptions) -> UResult<()> {
let mut state = OutputState { let mut state = OutputState {
line_number: 1, line_number: 1,
at_line_start: true, at_line_start: true,
skipped_carriage_return: false,
}; };
let mut error_messages: Vec<String> = Vec::new(); let mut error_messages: Vec<String> = Vec::new();
@ -347,6 +351,9 @@ fn cat_files(files: Vec<String>, options: &OutputOptions) -> UResult<()> {
error_messages.push(format!("{}: {}", path, err)); error_messages.push(format!("{}: {}", path, err));
} }
} }
if state.skipped_carriage_return {
print!("\r");
}
if error_messages.is_empty() { if error_messages.is_empty() {
Ok(()) Ok(())
} else { } else {
@ -435,6 +442,11 @@ fn write_lines<R: Read>(
while pos < n { while pos < n {
// skip empty line_number enumerating them if needed // skip empty line_number enumerating them if needed
if in_buf[pos] == b'\n' { if in_buf[pos] == b'\n' {
// \r followed by \n is printed as ^M when show_ends is enabled, so that \r\n prints as ^M$
if state.skipped_carriage_return && options.show_ends {
writer.write_all(b"^M")?;
state.skipped_carriage_return = false;
}
if !state.at_line_start || !options.squeeze_blank || !one_blank_kept { if !state.at_line_start || !options.squeeze_blank || !one_blank_kept {
one_blank_kept = true; one_blank_kept = true;
if state.at_line_start && options.number == NumberingMode::All { if state.at_line_start && options.number == NumberingMode::All {
@ -450,6 +462,11 @@ fn write_lines<R: Read>(
pos += 1; pos += 1;
continue; continue;
} }
if state.skipped_carriage_return {
writer.write_all(b"\r")?;
state.skipped_carriage_return = false;
state.at_line_start = false;
}
one_blank_kept = false; one_blank_kept = false;
if state.at_line_start && options.number != NumberingMode::None { if state.at_line_start && options.number != NumberingMode::None {
write!(&mut writer, "{0:6}\t", state.line_number)?; write!(&mut writer, "{0:6}\t", state.line_number)?;
@ -465,17 +482,22 @@ fn write_lines<R: Read>(
write_to_end(&in_buf[pos..], &mut writer) write_to_end(&in_buf[pos..], &mut writer)
}; };
// end of buffer? // end of buffer?
if offset == 0 { if offset + pos == in_buf.len() {
state.at_line_start = false; state.at_line_start = false;
break; break;
} }
if in_buf[pos + offset] == b'\r' {
state.skipped_carriage_return = true;
} else {
assert_eq!(in_buf[pos + offset], b'\n');
// print suitable end of line // print suitable end of line
writer.write_all(options.end_of_line().as_bytes())?; writer.write_all(options.end_of_line().as_bytes())?;
if handle.is_interactive { if handle.is_interactive {
writer.flush()?; writer.flush()?;
} }
state.at_line_start = true; state.at_line_start = true;
pos += offset; }
pos += offset + 1;
} }
} }
@ -483,17 +505,19 @@ fn write_lines<R: Read>(
} }
// write***_to_end methods // write***_to_end methods
// Write all symbols till end of line or end of buffer is reached // Write all symbols till \n or \r or end of buffer is reached
// Return the (number of written symbols + 1) or 0 if the end of buffer is reached // We need to stop at \r because it may be written as ^M depending on the byte after and settings;
// however, write_nonprint_to_end doesn't need to stop at \r because it will always write \r as ^M.
// Return the number of written symbols
fn write_to_end<W: Write>(in_buf: &[u8], writer: &mut W) -> usize { fn write_to_end<W: Write>(in_buf: &[u8], writer: &mut W) -> usize {
match in_buf.iter().position(|c| *c == b'\n') { match in_buf.iter().position(|c| *c == b'\n' || *c == b'\r') {
Some(p) => { Some(p) => {
writer.write_all(&in_buf[..p]).unwrap(); writer.write_all(&in_buf[..p]).unwrap();
p + 1 p
} }
None => { None => {
writer.write_all(in_buf).unwrap(); writer.write_all(in_buf).unwrap();
0 in_buf.len()
} }
} }
} }
@ -501,20 +525,25 @@ fn write_to_end<W: Write>(in_buf: &[u8], writer: &mut W) -> usize {
fn write_tab_to_end<W: Write>(mut in_buf: &[u8], writer: &mut W) -> usize { fn write_tab_to_end<W: Write>(mut in_buf: &[u8], writer: &mut W) -> usize {
let mut count = 0; let mut count = 0;
loop { loop {
match in_buf.iter().position(|c| *c == b'\n' || *c == b'\t') { match in_buf
.iter()
.position(|c| *c == b'\n' || *c == b'\t' || *c == b'\r')
{
Some(p) => { Some(p) => {
writer.write_all(&in_buf[..p]).unwrap(); writer.write_all(&in_buf[..p]).unwrap();
if in_buf[p] == b'\n' { if in_buf[p] == b'\n' {
return count + p + 1; return count + p;
} else { } else if in_buf[p] == b'\t' {
writer.write_all(b"^I").unwrap(); writer.write_all(b"^I").unwrap();
in_buf = &in_buf[p + 1..]; in_buf = &in_buf[p + 1..];
count += p + 1; count += p + 1;
} else {
return count + p;
} }
} }
None => { None => {
writer.write_all(in_buf).unwrap(); writer.write_all(in_buf).unwrap();
return 0; return in_buf.len();
} }
}; };
} }
@ -539,11 +568,7 @@ fn write_nonprint_to_end<W: Write>(in_buf: &[u8], writer: &mut W, tab: &[u8]) ->
.unwrap(); .unwrap();
count += 1; count += 1;
} }
if count != in_buf.len() { count
count + 1
} else {
0
}
} }
#[cfg(test)] #[cfg(test)]

View file

@ -273,6 +273,14 @@ fn test_stdin_show_ends() {
.stdout_only("\t\0$\n\t"); .stdout_only("\t\0$\n\t");
} }
} }
#[test]
fn test_show_ends_crlf() {
new_ucmd!()
.arg("-E")
.pipe_in("a\nb\r\n\rc\n\r\n\r")
.succeeds()
.stdout_only("a$\nb^M$\n\rc$\n^M$\n\r");
}
#[test] #[test]
fn test_stdin_show_all() { fn test_stdin_show_all() {