1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

csplit: don't add a newline if the file doesn't end with one (#7901)

* csplit: don't add a newline if the file doesn't end with one

* refactor test

* refactor
This commit is contained in:
Jeremy Smart 2025-05-09 03:15:54 -04:00 committed by GitHub
parent bcc02e9cea
commit a752f73476
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 63 additions and 21 deletions

View file

@ -6,7 +6,7 @@
#![allow(rustdoc::private_intra_doc_links)] #![allow(rustdoc::private_intra_doc_links)]
use std::cmp::Ordering; use std::cmp::Ordering;
use std::io::{self, BufReader}; use std::io::{self, BufReader, ErrorKind};
use std::{ use std::{
fs::{File, remove_file}, fs::{File, remove_file},
io::{BufRead, BufWriter, Write}, io::{BufRead, BufWriter, Write},
@ -71,6 +71,35 @@ impl CsplitOptions {
} }
} }
pub struct LinesWithNewlines<T: BufRead> {
inner: T,
}
impl<T: BufRead> LinesWithNewlines<T> {
fn new(s: T) -> Self {
Self { inner: s }
}
}
impl<T: BufRead> Iterator for LinesWithNewlines<T> {
type Item = io::Result<String>;
fn next(&mut self) -> Option<Self::Item> {
fn ret(v: Vec<u8>) -> io::Result<String> {
String::from_utf8(v).map_err(|_| {
io::Error::new(ErrorKind::InvalidData, "stream did not contain valid UTF-8")
})
}
let mut v = Vec::new();
match self.inner.read_until(b'\n', &mut v) {
Ok(0) => None,
Ok(_) => Some(ret(v)),
Err(e) => Some(Err(e)),
}
}
}
/// Splits a file into severals according to the command line patterns. /// Splits a file into severals according to the command line patterns.
/// ///
/// # Errors /// # Errors
@ -87,8 +116,7 @@ pub fn csplit<T>(options: &CsplitOptions, patterns: &[String], input: T) -> Resu
where where
T: BufRead, T: BufRead,
{ {
let enumerated_input_lines = input let enumerated_input_lines = LinesWithNewlines::new(input)
.lines()
.map(|line| line.map_err_context(|| "read error".to_string())) .map(|line| line.map_err_context(|| "read error".to_string()))
.enumerate(); .enumerate();
let mut input_iter = InputSplitter::new(enumerated_input_lines); let mut input_iter = InputSplitter::new(enumerated_input_lines);
@ -243,7 +271,7 @@ impl SplitWriter<'_> {
self.dev_null = true; self.dev_null = true;
} }
/// Writes the line to the current split, appending a newline character. /// Writes the line to the current split.
/// If [`self.dev_null`] is true, then the line is discarded. /// If [`self.dev_null`] is true, then the line is discarded.
/// ///
/// # Errors /// # Errors
@ -255,8 +283,7 @@ impl SplitWriter<'_> {
Some(ref mut current_writer) => { Some(ref mut current_writer) => {
let bytes = line.as_bytes(); let bytes = line.as_bytes();
current_writer.write_all(bytes)?; current_writer.write_all(bytes)?;
current_writer.write_all(b"\n")?; self.size += bytes.len();
self.size += bytes.len() + 1;
} }
None => panic!("trying to write to a split that was not created"), None => panic!("trying to write to a split that was not created"),
} }
@ -321,11 +348,11 @@ impl SplitWriter<'_> {
let mut ret = Err(CsplitError::LineOutOfRange(pattern_as_str.to_string())); let mut ret = Err(CsplitError::LineOutOfRange(pattern_as_str.to_string()));
while let Some((ln, line)) = input_iter.next() { while let Some((ln, line)) = input_iter.next() {
let l = line?; let line = line?;
match n.cmp(&(&ln + 1)) { match n.cmp(&(&ln + 1)) {
Ordering::Less => { Ordering::Less => {
assert!( assert!(
input_iter.add_line_to_buffer(ln, l).is_none(), input_iter.add_line_to_buffer(ln, line).is_none(),
"the buffer is big enough to contain 1 line" "the buffer is big enough to contain 1 line"
); );
ret = Ok(()); ret = Ok(());
@ -334,7 +361,7 @@ impl SplitWriter<'_> {
Ordering::Equal => { Ordering::Equal => {
assert!( assert!(
self.options.suppress_matched self.options.suppress_matched
|| input_iter.add_line_to_buffer(ln, l).is_none(), || input_iter.add_line_to_buffer(ln, line).is_none(),
"the buffer is big enough to contain 1 line" "the buffer is big enough to contain 1 line"
); );
ret = Ok(()); ret = Ok(());
@ -342,7 +369,7 @@ impl SplitWriter<'_> {
} }
Ordering::Greater => (), Ordering::Greater => (),
} }
self.writeln(&l)?; self.writeln(&line)?;
} }
self.finish_split(); self.finish_split();
ret ret
@ -379,23 +406,26 @@ impl SplitWriter<'_> {
input_iter.set_size_of_buffer(1); input_iter.set_size_of_buffer(1);
while let Some((ln, line)) = input_iter.next() { while let Some((ln, line)) = input_iter.next() {
let l = line?; let line = line?;
if regex.is_match(&l) { let l = line
.strip_suffix("\r\n")
.unwrap_or_else(|| line.strip_suffix('\n').unwrap_or(&line));
if regex.is_match(l) {
let mut next_line_suppress_matched = false; let mut next_line_suppress_matched = false;
match (self.options.suppress_matched, offset) { match (self.options.suppress_matched, offset) {
// no offset, add the line to the next split // no offset, add the line to the next split
(false, 0) => { (false, 0) => {
assert!( assert!(
input_iter.add_line_to_buffer(ln, l).is_none(), input_iter.add_line_to_buffer(ln, line).is_none(),
"the buffer is big enough to contain 1 line" "the buffer is big enough to contain 1 line"
); );
} }
// a positive offset, some more lines need to be added to the current split // a positive offset, some more lines need to be added to the current split
(false, _) => self.writeln(&l)?, (false, _) => self.writeln(&line)?,
// suppress matched option true, but there is a positive offset, so the line is printed // suppress matched option true, but there is a positive offset, so the line is printed
(true, 1..) => { (true, 1..) => {
next_line_suppress_matched = true; next_line_suppress_matched = true;
self.writeln(&l)?; self.writeln(&line)?;
} }
_ => (), _ => (),
}; };
@ -424,7 +454,7 @@ impl SplitWriter<'_> {
} }
return Ok(()); return Ok(());
} }
self.writeln(&l)?; self.writeln(&line)?;
} }
} else { } else {
// With a negative offset we use a buffer to keep the lines within the offset. // With a negative offset we use a buffer to keep the lines within the offset.
@ -435,8 +465,11 @@ impl SplitWriter<'_> {
let offset_usize = -offset as usize; let offset_usize = -offset as usize;
input_iter.set_size_of_buffer(offset_usize); input_iter.set_size_of_buffer(offset_usize);
while let Some((ln, line)) = input_iter.next() { while let Some((ln, line)) = input_iter.next() {
let l = line?; let line = line?;
if regex.is_match(&l) { let l = line
.strip_suffix("\r\n")
.unwrap_or_else(|| line.strip_suffix('\n').unwrap_or(&line));
if regex.is_match(l) {
for line in input_iter.shrink_buffer_to_size() { for line in input_iter.shrink_buffer_to_size() {
self.writeln(&line)?; self.writeln(&line)?;
} }
@ -444,12 +477,12 @@ impl SplitWriter<'_> {
// since offset_usize is for sure greater than 0 // since offset_usize is for sure greater than 0
// the first element of the buffer should be removed and this // the first element of the buffer should be removed and this
// line inserted to be coherent with GNU implementation // line inserted to be coherent with GNU implementation
input_iter.add_line_to_buffer(ln, l); input_iter.add_line_to_buffer(ln, line);
} else { } else {
// add 1 to the buffer size to make place for the matched line // add 1 to the buffer size to make place for the matched line
input_iter.set_size_of_buffer(offset_usize + 1); input_iter.set_size_of_buffer(offset_usize + 1);
assert!( assert!(
input_iter.add_line_to_buffer(ln, l).is_none(), input_iter.add_line_to_buffer(ln, line).is_none(),
"should be big enough to hold every lines" "should be big enough to hold every lines"
); );
} }
@ -460,7 +493,7 @@ impl SplitWriter<'_> {
} }
return Ok(()); return Ok(());
} }
if let Some(line) = input_iter.add_line_to_buffer(ln, l) { if let Some(line) = input_iter.add_line_to_buffer(ln, line) {
self.writeln(&line)?; self.writeln(&line)?;
} }
} }

View file

@ -1476,3 +1476,12 @@ fn test_directory_input_file() {
.fails_with_code(1) .fails_with_code(1)
.stderr_only("csplit: cannot open 'test_directory' for reading: Permission denied\n"); .stderr_only("csplit: cannot open 'test_directory' for reading: Permission denied\n");
} }
#[test]
fn test_stdin_no_trailing_newline() {
new_ucmd!()
.args(&["-", "2"])
.pipe_in("a\nb\nc\nd")
.succeeds()
.stdout_only("2\n5\n");
}