1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-28 03:27:44 +00:00

comm: implement the ordering check

A few comments:
* skip if the two args are pointing to the same file
* skip if the same content in the two files
* implement --check-order
* implement --nocheck-order
* output the right things on stderr

Should pass: tests/misc/comm
This commit is contained in:
Sylvestre Ledru 2025-01-15 21:18:04 +01:00
parent dca0ec0e65
commit b50a012d13
3 changed files with 291 additions and 24 deletions

View file

@ -18,7 +18,7 @@ path = "src/comm.rs"
[dependencies] [dependencies]
clap = { workspace = true } clap = { workspace = true }
uucore = { workspace = true } uucore = { workspace = true, features = ["fs"] }
[[bin]] [[bin]]
name = "comm" name = "comm"

View file

@ -3,12 +3,13 @@
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore (ToDO) delim mkdelim // spell-checker:ignore (ToDO) delim mkdelim pairable
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fs::{metadata, File}; use std::fs::{metadata, File};
use std::io::{self, stdin, BufRead, BufReader, Stdin}; use std::io::{self, stdin, BufRead, BufReader, Read, Stdin};
use uucore::error::{FromIo, UResult, USimpleError}; use uucore::error::{FromIo, UResult, USimpleError};
use uucore::fs::paths_refer_to_same_file;
use uucore::line_ending::LineEnding; use uucore::line_ending::LineEnding;
use uucore::{format_usage, help_about, help_usage}; use uucore::{format_usage, help_about, help_usage};
@ -27,6 +28,30 @@ mod options {
pub const FILE_2: &str = "FILE2"; pub const FILE_2: &str = "FILE2";
pub const TOTAL: &str = "total"; pub const TOTAL: &str = "total";
pub const ZERO_TERMINATED: &str = "zero-terminated"; pub const ZERO_TERMINATED: &str = "zero-terminated";
pub const CHECK_ORDER: &str = "check-order";
pub const NO_CHECK_ORDER: &str = "nocheck-order";
}
#[derive(Debug, Clone, Copy)]
enum FileNumber {
One,
Two,
}
impl FileNumber {
fn as_str(&self) -> &'static str {
match self {
FileNumber::One => "1",
FileNumber::Two => "2",
}
}
}
struct OrderChecker {
last_line: Vec<u8>,
file_num: FileNumber,
check_order: bool,
has_error: bool,
} }
enum Input { enum Input {
@ -60,7 +85,74 @@ impl LineReader {
} }
} }
fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) { impl OrderChecker {
fn new(file_num: FileNumber, check_order: bool) -> Self {
Self {
last_line: Vec::new(),
file_num,
check_order,
has_error: false,
}
}
fn verify_order(&mut self, current_line: &[u8]) -> bool {
if self.last_line.is_empty() {
self.last_line = current_line.to_vec();
return true;
}
let is_ordered = current_line >= &self.last_line;
if !is_ordered && !self.has_error {
eprintln!(
"comm: file {} is not in sorted order",
self.file_num.as_str()
);
self.has_error = true;
}
self.last_line = current_line.to_vec();
is_ordered || !self.check_order
}
}
// Check if two files are identical by comparing their contents
pub fn are_files_identical(path1: &str, path2: &str) -> io::Result<bool> {
// First compare file sizes
let metadata1 = std::fs::metadata(path1)?;
let metadata2 = std::fs::metadata(path2)?;
if metadata1.len() != metadata2.len() {
return Ok(false);
}
let file1 = File::open(path1)?;
let file2 = File::open(path2)?;
let mut reader1 = BufReader::new(file1);
let mut reader2 = BufReader::new(file2);
let mut buffer1 = [0; 8192];
let mut buffer2 = [0; 8192];
loop {
let bytes1 = reader1.read(&mut buffer1)?;
let bytes2 = reader2.read(&mut buffer2)?;
if bytes1 != bytes2 {
return Ok(false);
}
if bytes1 == 0 {
return Ok(true);
}
if buffer1[..bytes1] != buffer2[..bytes2] {
return Ok(false);
}
}
}
fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches) -> UResult<()> {
let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1)); let width_col_1 = usize::from(!opts.get_flag(options::COLUMN_1));
let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2)); let width_col_2 = usize::from(!opts.get_flag(options::COLUMN_2));
@ -76,6 +168,26 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
let mut total_col_2 = 0; let mut total_col_2 = 0;
let mut total_col_3 = 0; let mut total_col_3 = 0;
let check_order = opts.get_flag(options::CHECK_ORDER);
let no_check_order = opts.get_flag(options::NO_CHECK_ORDER);
// Determine if we should perform order checking
let should_check_order = !no_check_order
&& (check_order
|| if let (Some(file1), Some(file2)) = (
opts.get_one::<String>(options::FILE_1),
opts.get_one::<String>(options::FILE_2),
) {
!(paths_refer_to_same_file(file1, file2, true)
|| are_files_identical(file1, file2).unwrap_or(false))
} else {
true
});
let mut checker1 = OrderChecker::new(FileNumber::One, check_order);
let mut checker2 = OrderChecker::new(FileNumber::Two, check_order);
let mut input_error = false;
while na.is_ok() || nb.is_ok() { while na.is_ok() || nb.is_ok() {
let ord = match (na.is_ok(), nb.is_ok()) { let ord = match (na.is_ok(), nb.is_ok()) {
(false, true) => Ordering::Greater, (false, true) => Ordering::Greater,
@ -91,6 +203,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
match ord { match ord {
Ordering::Less => { Ordering::Less => {
if should_check_order && !checker1.verify_order(ra) {
break;
}
if !opts.get_flag(options::COLUMN_1) { if !opts.get_flag(options::COLUMN_1) {
print!("{}", String::from_utf8_lossy(ra)); print!("{}", String::from_utf8_lossy(ra));
} }
@ -99,6 +214,9 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
total_col_1 += 1; total_col_1 += 1;
} }
Ordering::Greater => { Ordering::Greater => {
if should_check_order && !checker2.verify_order(rb) {
break;
}
if !opts.get_flag(options::COLUMN_2) { if !opts.get_flag(options::COLUMN_2) {
print!("{delim_col_2}{}", String::from_utf8_lossy(rb)); print!("{delim_col_2}{}", String::from_utf8_lossy(rb));
} }
@ -107,6 +225,10 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
total_col_2 += 1; total_col_2 += 1;
} }
Ordering::Equal => { Ordering::Equal => {
if should_check_order && (!checker1.verify_order(ra) || !checker2.verify_order(rb))
{
break;
}
if !opts.get_flag(options::COLUMN_3) { if !opts.get_flag(options::COLUMN_3) {
print!("{delim_col_3}{}", String::from_utf8_lossy(ra)); print!("{delim_col_3}{}", String::from_utf8_lossy(ra));
} }
@ -117,12 +239,27 @@ fn comm(a: &mut LineReader, b: &mut LineReader, delim: &str, opts: &ArgMatches)
total_col_3 += 1; total_col_3 += 1;
} }
} }
// Track if we've seen any order errors
if (checker1.has_error || checker2.has_error) && !input_error && !check_order {
input_error = true;
}
} }
if opts.get_flag(options::TOTAL) { if opts.get_flag(options::TOTAL) {
let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED)); let line_ending = LineEnding::from_zero_flag(opts.get_flag(options::ZERO_TERMINATED));
print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}"); print!("{total_col_1}{delim}{total_col_2}{delim}{total_col_3}{delim}total{line_ending}");
} }
if should_check_order && (checker1.has_error || checker2.has_error) {
// Print the input error message once at the end
if input_error {
eprintln!("comm: input is not in sorted order");
}
Err(USimpleError::new(1, ""))
} else {
Ok(())
}
} }
fn open_file(name: &str, line_ending: LineEnding) -> io::Result<LineReader> { fn open_file(name: &str, line_ending: LineEnding) -> io::Result<LineReader> {
@ -170,8 +307,8 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
"" => "\0", "" => "\0",
delim => delim, delim => delim,
}; };
comm(&mut f1, &mut f2, delim, &matches);
Ok(()) comm(&mut f1, &mut f2, delim, &matches)
} }
pub fn uu_app() -> Command { pub fn uu_app() -> Command {
@ -233,4 +370,17 @@ pub fn uu_app() -> Command {
.help("output a summary") .help("output a summary")
.action(ArgAction::SetTrue), .action(ArgAction::SetTrue),
) )
.arg(
Arg::new(options::CHECK_ORDER)
.long(options::CHECK_ORDER)
.help("check that the input is correctly sorted, even if all input lines are pairable")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new(options::NO_CHECK_ORDER)
.long(options::NO_CHECK_ORDER)
.help("do not check that the input is correctly sorted")
.action(ArgAction::SetTrue)
.conflicts_with(options::CHECK_ORDER),
)
} }

View file

@ -352,11 +352,9 @@ fn defaultcheck_order() {
// * the first: if both files are not in order, the default behavior is the only // * the first: if both files are not in order, the default behavior is the only
// behavior that will provide an error message // behavior that will provide an error message
// * the second: if two rows are paired but are out of order, // * the second: if two rows are paired but are out of order,
// it won't matter if all rows in the two files are exactly the same. // it won't matter if all rows in the two files are exactly the same.
// This is specified in the documentation // This is specified in the documentation
#[test] #[test]
fn defaultcheck_order_identical_bad_order_files() { fn defaultcheck_order_identical_bad_order_files() {
let scene = TestScenario::new(util_name!()); let scene = TestScenario::new(util_name!());
@ -367,21 +365,13 @@ fn defaultcheck_order_identical_bad_order_files() {
.args(&["bad_order_1", "bad_order_1"]) .args(&["bad_order_1", "bad_order_1"])
.succeeds() .succeeds()
.stdout_is("\t\te\n\t\td\n\t\tb\n\t\ta\n"); .stdout_is("\t\te\n\t\td\n\t\tb\n\t\ta\n");
}
#[cfg_attr(not(feature = "test_unimplemented"), ignore)]
#[test]
fn defaultcheck_order_two_different_bad_order_files() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("bad_order_1", "e\nd\nb\na\n");
at.write("bad_order_2", "e\nc\nb\na\n");
scene scene
.ucmd() .ucmd()
.args(&["bad_order_1", "bad_order_2"]) .arg("--check-order")
.args(&["bad_order_1", "bad_order_1"])
.fails() .fails()
.stdout_is("\t\te\n\tc\n\tb\n\ta\nd\nb\na\n") .stdout_is("\t\te\n")
.stderr_is("error to be defined"); .stderr_is("comm: file 1 is not in sorted order\n");
} }
// * the third: (it is not know whether this is a bug or not) // * the third: (it is not know whether this is a bug or not)
@ -389,22 +379,20 @@ fn defaultcheck_order_two_different_bad_order_files() {
// where both lines are different and one or both file lines being // where both lines are different and one or both file lines being
// compared are out of order from the preceding line, // compared are out of order from the preceding line,
// it is ignored and no errors occur. // it is ignored and no errors occur.
// * the fourth: (it is not known whether this is a bug or not) // * the fourth: (it is not known whether this is a bug or not)
// there are additional, not-yet-understood circumstances where an out-of-order // there are additional, not-yet-understood circumstances where an out-of-order
// pair is ignored and is not counted against the 1 maximum out-of-order line. // pair is ignored and is not counted against the 1 maximum out-of-order line.
#[cfg_attr(not(feature = "test_unimplemented"), ignore)]
#[test] #[test]
fn unintuitive_default_behavior_1() { fn unintuitive_default_behavior_1() {
let scene = TestScenario::new(util_name!()); let scene = TestScenario::new(util_name!());
let at = &scene.fixtures; let at = &scene.fixtures;
at.write("defaultcheck_unintuitive_1", "m\nh\nn\no\nc\np\n"); at.write("defaultcheck_unintuitive_1", "m\nh\nn\no\nc\np\n");
at.write("defaultcheck_unintuitive_2", "m\nh\nn\no\np\n"); at.write("defaultcheck_unintuitive_2", "m\nh\nn\no\np\n");
// Here, GNU does not fail, but uutils does
scene scene
.ucmd() .ucmd()
.args(&["defaultcheck_unintuitive_1", "defaultcheck_unintuitive_2"]) .args(&["defaultcheck_unintuitive_1", "defaultcheck_unintuitive_2"])
.succeeds() .fails()
.stdout_is("\t\tm\n\t\th\n\t\tn\n\t\to\nc\n\t\tp\n"); .stdout_is("\t\tm\n\t\th\n\t\tn\n\t\to\nc\n\t\tp\n");
} }
@ -458,3 +446,132 @@ fn test_is_dir() {
.fails() .fails()
.stderr_only("comm: .: Is a directory\n"); .stderr_only("comm: .: Is a directory\n");
} }
#[test]
fn test_sorted() {
let expected_stderr =
"comm: file 2 is not in sorted order\ncomm: input is not in sorted order\n";
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("comm1", "1\n3");
at.write("comm2", "3\n2");
scene
.ucmd()
.args(&["comm1", "comm2"])
.fails()
.code_is(1)
.stdout_is("1\n\t\t3\n\t2\n")
.stderr_is(expected_stderr);
}
#[test]
fn test_sorted_check_order() {
let expected_stderr = "comm: file 2 is not in sorted order\n";
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("comm1", "1\n3");
at.write("comm2", "3\n2");
scene
.ucmd()
.arg("--check-order")
.args(&["comm1", "comm2"])
.fails()
.code_is(1)
.stdout_is("1\n\t\t3\n")
.stderr_is(expected_stderr);
}
#[test]
fn test_both_inputs_out_of_order() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("file_a", "3\n1\n0\n");
at.write("file_b", "3\n2\n0\n");
scene
.ucmd()
.args(&["file_a", "file_b"])
.fails()
.code_is(1)
.stdout_is("\t\t3\n1\n0\n\t2\n\t0\n")
.stderr_is(
"comm: file 1 is not in sorted order\n\
comm: file 2 is not in sorted order\n\
comm: input is not in sorted order\n",
);
}
#[test]
fn test_both_inputs_out_of_order_last_pair() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("file_a", "3\n1\n");
at.write("file_b", "3\n2\n");
scene
.ucmd()
.args(&["file_a", "file_b"])
.fails()
.code_is(1)
.stdout_is("\t\t3\n1\n\t2\n")
.stderr_is(
"comm: file 1 is not in sorted order\n\
comm: file 2 is not in sorted order\n\
comm: input is not in sorted order\n",
);
}
#[test]
fn test_first_input_out_of_order_extended() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("file_a", "0\n3\n1\n");
at.write("file_b", "2\n3\n");
scene
.ucmd()
.args(&["file_a", "file_b"])
.fails()
.code_is(1)
.stdout_is("0\n\t2\n\t\t3\n1\n")
.stderr_is(
"comm: file 1 is not in sorted order\n\
comm: input is not in sorted order\n",
);
}
#[test]
fn test_out_of_order_input_nocheck() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
// Create input files
at.write("file_a", "1\n3\n");
at.write("file_b", "3\n2\n");
scene
.ucmd()
.arg("--nocheck-order")
.args(&["file_a", "file_b"])
.succeeds()
.stdout_is("1\n\t\t3\n\t2\n")
.no_stderr();
}
#[test]
fn test_both_inputs_out_of_order_but_identical() {
let scene = TestScenario::new(util_name!());
let at = &scene.fixtures;
at.write("file_a", "2\n1\n0\n");
at.write("file_b", "2\n1\n0\n");
scene
.ucmd()
.args(&["file_a", "file_b"])
.succeeds()
.stdout_is("\t\t2\n\t\t1\n\t\t0\n")
.no_stderr();
}