From fa44957a63e367271ef5455d025b08036a34b3b6 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 30 Jan 2022 22:03:17 -0500 Subject: [PATCH 1/7] paste: Handle unicode delimiters --- src/uu/paste/src/paste.rs | 10 ++++++---- tests/by-util/test_paste.rs | 12 ++++++++++++ 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/src/uu/paste/src/paste.rs b/src/uu/paste/src/paste.rs index 0792da458..5208f1b5a 100644 --- a/src/uu/paste/src/paste.rs +++ b/src/uu/paste/src/paste.rs @@ -10,7 +10,6 @@ use clap::{crate_version, App, AppSettings, Arg}; use std::fs::File; use std::io::{stdin, BufRead, BufReader, Read}; -use std::iter::repeat; use std::path::Path; use uucore::error::{FromIo, UResult}; @@ -110,10 +109,11 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> } delim_count += 1; } - println!("{}", &output[..output.len() - 1]); + output.pop(); + println!("{}", output); } } else { - let mut eof: Vec = repeat(false).take(files.len()).collect(); + let mut eof = vec![false; files.len()]; loop { let mut output = String::new(); let mut eof_count = 0; @@ -137,7 +137,9 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> if files.len() == eof_count { break; } - println!("{}", &output[..output.len() - 1]); + // Remove final delimiter + output.pop(); + println!("{}", output); delim_count = 0; } } diff --git a/tests/by-util/test_paste.rs b/tests/by-util/test_paste.rs index 1afe84be8..5363e6962 100644 --- a/tests/by-util/test_paste.rs +++ b/tests/by-util/test_paste.rs @@ -60,6 +60,18 @@ static EXAMPLE_DATA: &[TestData] = &[ ins: &["1\na\n", "2\nb\n"], out: "1 2\na b\n", }, + TestData { + name: "multibyte-delim", + args: &["-d", "πŸ’£"], + ins: &["1\na\n", "2\nb\n"], + out: "1πŸ’£2\naπŸ’£b\n", + }, + TestData { + name: "multibyte-delim-serial", + args: &["-d", "πŸ’£", "-s"], + ins: &["1\na\n", "2\nb\n"], + out: "1πŸ’£a\n2πŸ’£b\n", + }, ]; #[test] From c6ec4f8f170e1d29ae0580db9111c10e23edd999 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 30 Jan 2022 22:06:33 -0500 Subject: [PATCH 2/7] paste: Store delimiters as chars, rather than strings --- src/uu/paste/src/paste.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/uu/paste/src/paste.rs b/src/uu/paste/src/paste.rs index 5208f1b5a..c43a86b75 100644 --- a/src/uu/paste/src/paste.rs +++ b/src/uu/paste/src/paste.rs @@ -88,10 +88,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> files.push(file); } - let delimiters: Vec = unescape(delimiters) - .chars() - .map(|x| x.to_string()) - .collect(); + let delimiters: Vec = unescape(delimiters).chars().collect(); let mut delim_count = 0; if serial { @@ -103,7 +100,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> Ok(0) => break, Ok(_) => { output.push_str(line.trim_end()); - output.push_str(&delimiters[delim_count % delimiters.len()]); + output.push(delimiters[delim_count % delimiters.len()]); } Err(e) => return Err(e.map_err_context(String::new)), } @@ -131,7 +128,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> Err(e) => return Err(e.map_err_context(String::new)), } } - output.push_str(&delimiters[delim_count % delimiters.len()]); + output.push(delimiters[delim_count % delimiters.len()]); delim_count += 1; } if files.len() == eof_count { From 8905d52279c4ac1a737767a815d09c179e0798cd Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 30 Jan 2022 22:26:13 -0500 Subject: [PATCH 3/7] paste: Write to a locked stdout --- src/uu/paste/src/paste.rs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/uu/paste/src/paste.rs b/src/uu/paste/src/paste.rs index c43a86b75..dbc0bde76 100644 --- a/src/uu/paste/src/paste.rs +++ b/src/uu/paste/src/paste.rs @@ -9,7 +9,7 @@ use clap::{crate_version, App, AppSettings, Arg}; use std::fs::File; -use std::io::{stdin, BufRead, BufReader, Read}; +use std::io::{stdin, BufRead, BufReader, Read, stdout, Write}; use std::path::Path; use uucore::error::{FromIo, UResult}; @@ -90,12 +90,15 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> let delimiters: Vec = unescape(delimiters).chars().collect(); let mut delim_count = 0; + let stdout = stdout(); + let mut stdout = stdout.lock(); + let mut line = String::new(); if serial { for file in &mut files { let mut output = String::new(); loop { - let mut line = String::new(); + line.clear(); match read_line(file.as_mut(), &mut line) { Ok(0) => break, Ok(_) => { @@ -107,7 +110,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> delim_count += 1; } output.pop(); - println!("{}", output); + writeln!(stdout, "{}", output)?; } } else { let mut eof = vec![false; files.len()]; @@ -118,7 +121,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> if eof[i] { eof_count += 1; } else { - let mut line = String::new(); + line.clear(); match read_line(file.as_mut(), &mut line) { Ok(0) => { eof[i] = true; @@ -136,7 +139,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> } // Remove final delimiter output.pop(); - println!("{}", output); + writeln!(stdout, "{}", output)?; delim_count = 0; } } From ff14f25c34d4d22f3f6d031ab88b94a6a9115a0f Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 30 Jan 2022 22:27:22 -0500 Subject: [PATCH 4/7] paste: Reuse `output` allocation --- src/uu/paste/src/paste.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/uu/paste/src/paste.rs b/src/uu/paste/src/paste.rs index dbc0bde76..db5fa5b8a 100644 --- a/src/uu/paste/src/paste.rs +++ b/src/uu/paste/src/paste.rs @@ -94,9 +94,10 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> let mut stdout = stdout.lock(); let mut line = String::new(); + let mut output = String::new(); if serial { for file in &mut files { - let mut output = String::new(); + output.clear(); loop { line.clear(); match read_line(file.as_mut(), &mut line) { @@ -115,7 +116,7 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> } else { let mut eof = vec![false; files.len()]; loop { - let mut output = String::new(); + output.clear(); let mut eof_count = 0; for (i, file) in files.iter_mut().enumerate() { if eof[i] { From 85a81d328a9e2c40c0c2682a11071c1634a1a204 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 30 Jan 2022 22:28:05 -0500 Subject: [PATCH 5/7] paste: Create vec with capacity --- src/uu/paste/src/paste.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/uu/paste/src/paste.rs b/src/uu/paste/src/paste.rs index db5fa5b8a..6051fbeed 100644 --- a/src/uu/paste/src/paste.rs +++ b/src/uu/paste/src/paste.rs @@ -9,7 +9,7 @@ use clap::{crate_version, App, AppSettings, Arg}; use std::fs::File; -use std::io::{stdin, BufRead, BufReader, Read, stdout, Write}; +use std::io::{stdin, stdout, BufRead, BufReader, Read, Write}; use std::path::Path; use uucore::error::{FromIo, UResult}; @@ -76,7 +76,7 @@ pub fn uu_app<'a>() -> App<'a> { } fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> { - let mut files = vec![]; + let mut files = Vec::with_capacity(filenames.len()); for name in filenames { let file = if name == "-" { None From ad4c5d3357b93c810321b1216454cf061c2c3adc Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 30 Jan 2022 22:45:50 -0500 Subject: [PATCH 6/7] paste: Use a single buffer --- src/uu/paste/src/paste.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/uu/paste/src/paste.rs b/src/uu/paste/src/paste.rs index 6051fbeed..dc93ae625 100644 --- a/src/uu/paste/src/paste.rs +++ b/src/uu/paste/src/paste.rs @@ -93,17 +93,17 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> let stdout = stdout(); let mut stdout = stdout.lock(); - let mut line = String::new(); let mut output = String::new(); if serial { for file in &mut files { output.clear(); loop { - line.clear(); - match read_line(file.as_mut(), &mut line) { + match read_line(file.as_mut(), &mut output) { Ok(0) => break, Ok(_) => { - output.push_str(line.trim_end()); + if output.ends_with('\n') { + output.pop(); + } output.push(delimiters[delim_count % delimiters.len()]); } Err(e) => return Err(e.map_err_context(String::new)), @@ -122,13 +122,16 @@ fn paste(filenames: Vec, serial: bool, delimiters: &str) -> UResult<()> if eof[i] { eof_count += 1; } else { - line.clear(); - match read_line(file.as_mut(), &mut line) { + match read_line(file.as_mut(), &mut output) { Ok(0) => { eof[i] = true; eof_count += 1; } - Ok(_) => output.push_str(line.trim_end()), + Ok(_) => { + if output.ends_with('\n') { + output.pop(); + } + } Err(e) => return Err(e.map_err_context(String::new)), } } From f75466bb31f9ac7932582ee821100bf9debc4666 Mon Sep 17 00:00:00 2001 From: Zachary Dremann Date: Sun, 30 Jan 2022 22:49:18 -0500 Subject: [PATCH 7/7] tests/paste: Add test to avoid trimming extra whitespace --- tests/by-util/test_paste.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/by-util/test_paste.rs b/tests/by-util/test_paste.rs index 5363e6962..09401ec59 100644 --- a/tests/by-util/test_paste.rs +++ b/tests/by-util/test_paste.rs @@ -72,6 +72,12 @@ static EXAMPLE_DATA: &[TestData] = &[ ins: &["1\na\n", "2\nb\n"], out: "1πŸ’£a\n2πŸ’£b\n", }, + TestData { + name: "trailing whitespace", + args: &["-d", "|"], + ins: &["1 \na \n", "2\t\nb\t\n"], + out: "1 |2\t\na |b\t\n", + }, ]; #[test]