From 17174ab9863dbcf50a9fd379cebd9dd40c451f9b Mon Sep 17 00:00:00 2001 From: Yury Zhytkou <54360928+zhitkoff@users.noreply.github.com> Date: Sun, 25 Feb 2024 03:45:37 -0500 Subject: [PATCH] `uniq`: pass remaining GNU tests (#5994) --- src/uu/uniq/src/uniq.rs | 490 ++++++++++++++++++------ src/uucore/src/lib/lib.rs | 1 + src/uucore/src/lib/mods.rs | 1 + src/uucore/src/lib/mods/posix.rs | 52 +++ tests/by-util/test_uniq.rs | 162 ++++++-- tests/fixtures/uniq/locale-fr-schar.txt | 2 + 6 files changed, 550 insertions(+), 158 deletions(-) create mode 100644 src/uucore/src/lib/mods/posix.rs create mode 100644 tests/fixtures/uniq/locale-fr-schar.txt diff --git a/src/uu/uniq/src/uniq.rs b/src/uu/uniq/src/uniq.rs index dd8c9f5b6..e074ebe42 100644 --- a/src/uu/uniq/src/uniq.rs +++ b/src/uu/uniq/src/uniq.rs @@ -2,14 +2,18 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. - -use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgGroup, ArgMatches, Command}; +// spell-checker:ignore badoption +use clap::{ + builder::ValueParser, crate_version, error::ContextKind, error::Error, error::ErrorKind, Arg, + ArgAction, ArgMatches, Command, +}; use std::ffi::{OsStr, OsString}; use std::fs::File; -use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Write}; -use std::str::FromStr; +use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Write}; +use std::num::IntErrorKind; use uucore::display::Quotable; -use uucore::error::{FromIo, UResult, USimpleError, UUsageError}; +use uucore::error::{FromIo, UError, UResult, USimpleError}; +use uucore::posix::{posix_version, OBSOLETE}; use uucore::{format_usage, help_about, help_section, help_usage}; const ABOUT: &str = help_about!("uniq.md"); @@ -23,7 +27,6 @@ pub mod options { pub static IGNORE_CASE: &str = "ignore-case"; pub static REPEATED: &str = "repeated"; pub static SKIP_FIELDS: &str = "skip-fields"; - pub static OBSOLETE_SKIP_FIELDS: &str = "obsolete_skip_field"; pub static SKIP_CHARS: &str = "skip-chars"; pub static UNIQUE: &str = "unique"; pub static ZERO_TERMINATED: &str = "zero-terminated"; @@ -54,8 +57,6 @@ struct Uniq { zero_terminated: bool, } -const OBSOLETE_SKIP_FIELDS_DIGITS: [&str; 10] = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]; - macro_rules! write_line_terminator { ($writer:expr, $line_terminator:expr) => { $writer @@ -69,7 +70,7 @@ impl Uniq { let mut first_line_printed = false; let mut group_count = 1; let line_terminator = self.get_line_terminator(); - let mut lines = reader.split(line_terminator).map(get_line_string); + let mut lines = reader.split(line_terminator); let mut line = match lines.next() { Some(l) => l?, None => return Ok(()), @@ -111,22 +112,28 @@ impl Uniq { Ok(()) } - fn skip_fields<'a>(&self, line: &'a str) -> &'a str { + fn skip_fields(&self, line: &[u8]) -> Vec { if let Some(skip_fields) = self.skip_fields { - let mut i = 0; - let mut char_indices = line.char_indices(); + let mut line = line.iter(); + let mut line_after_skipped_field: Vec; for _ in 0..skip_fields { - if char_indices.all(|(_, c)| c.is_whitespace()) { - return ""; + if line.all(|u| u.is_ascii_whitespace()) { + return Vec::new(); } - match char_indices.find(|(_, c)| c.is_whitespace()) { - None => return "", - Some((next_field_i, _)) => i = next_field_i, + line_after_skipped_field = line + .by_ref() + .skip_while(|u| !u.is_ascii_whitespace()) + .copied() + .collect::>(); + + if line_after_skipped_field.is_empty() { + return Vec::new(); } + line = line_after_skipped_field.iter(); } - &line[i..] + line.copied().collect::>() } else { - line + line.to_vec() } } @@ -138,15 +145,15 @@ impl Uniq { } } - fn cmp_keys(&self, first: &str, second: &str) -> bool { + fn cmp_keys(&self, first: &[u8], second: &[u8]) -> bool { self.cmp_key(first, |first_iter| { self.cmp_key(second, |second_iter| first_iter.ne(second_iter)) }) } - fn cmp_key(&self, line: &str, mut closure: F) -> bool + fn cmp_key(&self, line: &[u8], mut closure: F) -> bool where - F: FnMut(&mut dyn Iterator) -> bool, + F: FnMut(&mut dyn Iterator) -> bool, { let fields_to_check = self.skip_fields(line); let len = fields_to_check.len(); @@ -155,28 +162,34 @@ impl Uniq { if len > 0 { // fast path: avoid doing any work if there is no need to skip or map to lower-case if !self.ignore_case && slice_start == 0 && slice_stop == len { - return closure(&mut fields_to_check.chars()); + return closure(&mut fields_to_check.iter().copied()); } // fast path: avoid skipping if self.ignore_case && slice_start == 0 && slice_stop == len { - return closure(&mut fields_to_check.chars().flat_map(char::to_uppercase)); + return closure(&mut fields_to_check.iter().map(|u| u.to_ascii_lowercase())); } - // fast path: we can avoid mapping chars to upper-case, if we don't want to ignore the case + // fast path: we can avoid mapping chars to lower-case, if we don't want to ignore the case if !self.ignore_case { - return closure(&mut fields_to_check.chars().skip(slice_start).take(slice_stop)); + return closure( + &mut fields_to_check + .iter() + .skip(slice_start) + .take(slice_stop) + .copied(), + ); } closure( &mut fields_to_check - .chars() + .iter() .skip(slice_start) .take(slice_stop) - .flat_map(char::to_uppercase), + .map(|u| u.to_ascii_lowercase()), ) } else { - closure(&mut fields_to_check.chars()) + closure(&mut fields_to_check.iter().copied()) } } @@ -196,7 +209,7 @@ impl Uniq { fn print_line( &self, writer: &mut impl Write, - line: &str, + line: &[u8], count: usize, first_line_printed: bool, ) -> UResult<()> { @@ -207,9 +220,16 @@ impl Uniq { } if self.show_counts { - write!(writer, "{count:7} {line}") + let prefix = format!("{count:7} "); + let out = prefix + .as_bytes() + .iter() + .chain(line.iter()) + .copied() + .collect::>(); + writer.write_all(out.as_slice()) } else { - writer.write_all(line.as_bytes()) + writer.write_all(line) } .map_err_context(|| "Failed to write line".to_string())?; @@ -217,66 +237,328 @@ impl Uniq { } } -fn get_line_string(io_line: io::Result>) -> UResult { - let line_bytes = io_line.map_err_context(|| "failed to split lines".to_string())?; - String::from_utf8(line_bytes) - .map_err(|e| USimpleError::new(1, format!("failed to convert line to utf8: {e}"))) +fn opt_parsed(opt_name: &str, matches: &ArgMatches) -> UResult> { + match matches.get_one::(opt_name) { + Some(arg_str) => match arg_str.parse::() { + Ok(v) => Ok(Some(v)), + Err(e) => match e.kind() { + IntErrorKind::PosOverflow => Ok(Some(usize::MAX)), + _ => Err(USimpleError::new( + 1, + format!( + "Invalid argument for {}: {}", + opt_name, + arg_str.maybe_quote() + ), + )), + }, + }, + None => Ok(None), + } } -fn opt_parsed(opt_name: &str, matches: &ArgMatches) -> UResult> { - Ok(match matches.get_one::(opt_name) { - Some(arg_str) => Some(arg_str.parse().map_err(|_| { - USimpleError::new( - 1, - format!( - "Invalid argument for {}: {}", - opt_name, - arg_str.maybe_quote() - ), - ) - })?), - None => None, - }) -} - -/// Gets number of fields to be skipped from the shorthand option `-N` +/// Extract obsolete shorthands (if any) for skip fields and skip chars options +/// following GNU `uniq` behavior /// -/// ```bash -/// uniq -12345 -/// ``` -/// the first digit isn't interpreted by clap as part of the value -/// so `get_one()` would return `2345`, then to get the actual value -/// we loop over every possible first digit, only one of which can be -/// found in the command line because they conflict with each other, -/// append the value to it and parse the resulting string as usize, -/// an error at this point means that a character that isn't a digit was given -fn obsolete_skip_field(matches: &ArgMatches) -> UResult> { - for opt_text in OBSOLETE_SKIP_FIELDS_DIGITS { - let argument = matches.get_one::(opt_text); - if matches.contains_id(opt_text) { - let mut full = opt_text.to_owned(); - if let Some(ar) = argument { - full.push_str(ar); - } - let value = full.parse::(); +/// Examples for obsolete skip fields option +/// `uniq -1 file` would equal `uniq -f1 file` +/// `uniq -1 -2 -3 file` would equal `uniq -f123 file` +/// `uniq -1 -2 -f5 file` would equal `uniq -f5 file` +/// `uniq -u20s4 file` would equal `uniq -u -f20 -s4 file` +/// `uniq -D1w3 -3 file` would equal `uniq -D -f3 -w3 file` +/// +/// Examples for obsolete skip chars option +/// `uniq +1 file` would equal `uniq -s1 file` +/// `uniq +1 -s2 file` would equal `uniq -s2 file` +/// `uniq -s2 +3 file` would equal `uniq -s3 file` +/// +fn handle_obsolete(args: impl uucore::Args) -> (Vec, Option, Option) { + let mut skip_fields_old = None; + let mut skip_chars_old = None; + let mut preceding_long_opt_req_value = false; + let mut preceding_short_opt_req_value = false; - if let Ok(val) = value { - return Ok(Some(val)); - } else { - return Err(USimpleError { - code: 1, - message: format!("Invalid argument for skip-fields: {}", full), - } - .into()); + let filtered_args = args + .filter_map(|os_slice| { + filter_args( + os_slice, + &mut skip_fields_old, + &mut skip_chars_old, + &mut preceding_long_opt_req_value, + &mut preceding_short_opt_req_value, + ) + }) + .collect(); + + // exacted String values (if any) for skip_fields_old and skip_chars_old + // are guaranteed to consist of ascii digit chars only at this point + // so, it is safe to parse into usize and collapse Result into Option + let skip_fields_old: Option = skip_fields_old.and_then(|v| v.parse::().ok()); + let skip_chars_old: Option = skip_chars_old.and_then(|v| v.parse::().ok()); + + (filtered_args, skip_fields_old, skip_chars_old) +} + +fn filter_args( + os_slice: OsString, + skip_fields_old: &mut Option, + skip_chars_old: &mut Option, + preceding_long_opt_req_value: &mut bool, + preceding_short_opt_req_value: &mut bool, +) -> Option { + let filter: Option; + if let Some(slice) = os_slice.to_str() { + if should_extract_obs_skip_fields( + slice, + preceding_long_opt_req_value, + preceding_short_opt_req_value, + ) { + // start of the short option string + // that can have obsolete skip fields option value in it + filter = handle_extract_obs_skip_fields(slice, skip_fields_old); + } else if should_extract_obs_skip_chars( + slice, + preceding_long_opt_req_value, + preceding_short_opt_req_value, + ) { + // the obsolete skip chars option + filter = handle_extract_obs_skip_chars(slice, skip_chars_old); + } else { + // either not a short option + // or a short option that cannot have obsolete lines value in it + filter = Some(OsString::from(slice)); + // Check and reset to None obsolete values extracted so far + // if corresponding new/documented options are encountered next. + // NOTE: For skip fields - occurrences of corresponding new/documented options + // inside combined short options ike '-u20s4' or '-D1w3', etc + // are also covered in `handle_extract_obs_skip_fields()` function + if slice.starts_with("-f") { + *skip_fields_old = None; + } + if slice.starts_with("-s") { + *skip_chars_old = None; } } + handle_preceding_options( + slice, + preceding_long_opt_req_value, + preceding_short_opt_req_value, + ); + } else { + // Cannot cleanly convert os_slice to UTF-8 + // Do not process and return as-is + // This will cause failure later on, but we should not handle it here + // and let clap panic on invalid UTF-8 argument + filter = Some(os_slice); } - Ok(None) + filter +} + +/// Helper function to [`filter_args`] +/// Checks if the slice is a true short option (and not hyphen prefixed value of an option) +/// and if so, a short option that can contain obsolete skip fields value +fn should_extract_obs_skip_fields( + slice: &str, + preceding_long_opt_req_value: &bool, + preceding_short_opt_req_value: &bool, +) -> bool { + slice.starts_with('-') + && !slice.starts_with("--") + && !preceding_long_opt_req_value + && !preceding_short_opt_req_value + && !slice.starts_with("-s") + && !slice.starts_with("-f") + && !slice.starts_with("-w") +} + +/// Helper function to [`filter_args`] +/// Checks if the slice is a true obsolete skip chars short option +fn should_extract_obs_skip_chars( + slice: &str, + preceding_long_opt_req_value: &bool, + preceding_short_opt_req_value: &bool, +) -> bool { + slice.starts_with('+') + && posix_version().is_some_and(|v| v <= OBSOLETE) + && !preceding_long_opt_req_value + && !preceding_short_opt_req_value + && slice.chars().nth(1).map_or(false, |c| c.is_ascii_digit()) +} + +/// Helper function to [`filter_args`] +/// Captures if current slice is a preceding option +/// that requires value +fn handle_preceding_options( + slice: &str, + preceding_long_opt_req_value: &mut bool, + preceding_short_opt_req_value: &mut bool, +) { + // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value + // following slice should be treaded as value for this option + // even if it starts with '-' (which would be treated as hyphen prefixed value) + if slice.starts_with("--") { + use options as O; + *preceding_long_opt_req_value = &slice[2..] == O::SKIP_CHARS + || &slice[2..] == O::SKIP_FIELDS + || &slice[2..] == O::CHECK_CHARS + || &slice[2..] == O::GROUP + || &slice[2..] == O::ALL_REPEATED; + } + // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace) + // following slice should be treaded as value for this option + // even if it starts with '-' (which would be treated as hyphen prefixed value) + *preceding_short_opt_req_value = slice == "-s" || slice == "-f" || slice == "-w"; + // slice is a value + // reset preceding option flags + if !slice.starts_with('-') { + *preceding_short_opt_req_value = false; + *preceding_long_opt_req_value = false; + } +} + +/// Helper function to [`filter_args`] +/// Extracts obsolete skip fields numeric part from argument slice +/// and filters it out +fn handle_extract_obs_skip_fields( + slice: &str, + skip_fields_old: &mut Option, +) -> Option { + let mut obs_extracted: Vec = vec![]; + let mut obs_end_reached = false; + let mut obs_overwritten_by_new = false; + let filtered_slice: Vec = slice + .chars() + .filter(|c| { + if c.eq(&'f') { + // any extracted obsolete skip fields value up to this point should be discarded + // as the new/documented option for skip fields was used after it + // i.e. in situation like `-u12f3` + // The obsolete skip fields value should still be extracted, filtered out + // but the skip_fields_old should be set to None instead of Some(String) later on + obs_overwritten_by_new = true; + } + // To correctly process scenario like '-u20s4' or '-D1w3', etc + // we need to stop extracting digits once alphabetic character is encountered + // after we already have something in obs_extracted + if c.is_ascii_digit() && !obs_end_reached { + obs_extracted.push(*c); + false + } else { + if !obs_extracted.is_empty() { + obs_end_reached = true; + } + true + } + }) + .collect(); + + if obs_extracted.is_empty() { + // no obsolete value found/extracted + Some(OsString::from(slice)) + } else { + // obsolete value was extracted + // unless there was new/documented option for skip fields used after it + // set the skip_fields_old value (concatenate to it if there was a value there already) + if obs_overwritten_by_new { + *skip_fields_old = None; + } else { + let mut extracted: String = obs_extracted.iter().collect(); + if let Some(val) = skip_fields_old { + extracted.push_str(val); + } + *skip_fields_old = Some(extracted); + } + if filtered_slice.get(1).is_some() { + // there were some short options in front of or after obsolete lines value + // i.e. '-u20s4' or '-D1w3' or similar, which after extraction of obsolete lines value + // would look like '-us4' or '-Dw3' or similar + let filtered_slice: String = filtered_slice.iter().collect(); + Some(OsString::from(filtered_slice)) + } else { + None + } + } +} + +/// Helper function to [`filter_args`] +/// Extracts obsolete skip chars numeric part from argument slice +fn handle_extract_obs_skip_chars( + slice: &str, + skip_chars_old: &mut Option, +) -> Option { + let mut obs_extracted: Vec = vec![]; + let mut slice_chars = slice.chars(); + slice_chars.next(); // drop leading '+' character + for c in slice_chars { + if c.is_ascii_digit() { + obs_extracted.push(c); + } else { + // for obsolete skip chars option the whole value after '+' should be numeric + // so, if any non-digit characters are encountered in the slice (i.e. `+1q`, etc) + // set skip_chars_old to None and return whole slice back. + // It will be parsed by clap and panic with appropriate error message + *skip_chars_old = None; + return Some(OsString::from(slice)); + } + } + if obs_extracted.is_empty() { + // no obsolete value found/extracted + // i.e. it was just '+' character alone + Some(OsString::from(slice)) + } else { + // successfully extracted numeric value + // capture it and return None to filter out the whole slice + *skip_chars_old = Some(obs_extracted.iter().collect()); + None + } +} + +/// Maps Clap errors to USimpleError and overrides 3 specific ones +/// to meet requirements of GNU tests for `uniq`. +/// Unfortunately these overrides are necessary, since several GNU tests +/// for `uniq` hardcode and require the exact wording of the error message +/// and it is not compatible with how Clap formats and displays those error messages. +fn map_clap_errors(clap_error: &Error) -> Box { + let footer = "Try 'uniq --help' for more information."; + let override_arg_conflict = + "--group is mutually exclusive with -c/-d/-D/-u\n".to_string() + footer; + let override_group_badoption = "invalid argument 'badoption' for '--group'\nValid arguments are:\n - 'prepend'\n - 'append'\n - 'separate'\n - 'both'\n".to_string() + footer; + let override_all_repeated_badoption = "invalid argument 'badoption' for '--all-repeated'\nValid arguments are:\n - 'none'\n - 'prepend'\n - 'separate'\n".to_string() + footer; + + let error_message = match clap_error.kind() { + ErrorKind::ArgumentConflict => override_arg_conflict, + ErrorKind::InvalidValue + if clap_error + .get(ContextKind::InvalidValue) + .is_some_and(|v| v.to_string() == "badoption") + && clap_error + .get(ContextKind::InvalidArg) + .is_some_and(|v| v.to_string().starts_with("--group")) => + { + override_group_badoption + } + ErrorKind::InvalidValue + if clap_error + .get(ContextKind::InvalidValue) + .is_some_and(|v| v.to_string() == "badoption") + && clap_error + .get(ContextKind::InvalidArg) + .is_some_and(|v| v.to_string().starts_with("--all-repeated")) => + { + override_all_repeated_badoption + } + _ => clap_error.to_string(), + }; + USimpleError::new(1, error_message) } #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let matches = uu_app().after_help(AFTER_HELP).try_get_matches_from(args)?; + let (args, skip_fields_old, skip_chars_old) = handle_obsolete(args); + + let matches = uu_app() + .try_get_matches_from(args) + .map_err(|e| map_clap_errors(&e))?; let files = matches.get_many::(ARG_FILES); @@ -286,8 +568,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { .unwrap_or_default(); let skip_fields_modern: Option = opt_parsed(options::SKIP_FIELDS, &matches)?; - - let skip_fields_old: Option = obsolete_skip_field(&matches)?; + let skip_chars_modern: Option = opt_parsed(options::SKIP_CHARS, &matches)?; let uniq = Uniq { repeats_only: matches.get_flag(options::REPEATED) @@ -298,16 +579,16 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { delimiters: get_delimiter(&matches), show_counts: matches.get_flag(options::COUNT), skip_fields: skip_fields_modern.or(skip_fields_old), - slice_start: opt_parsed(options::SKIP_CHARS, &matches)?, + slice_start: skip_chars_modern.or(skip_chars_old), slice_stop: opt_parsed(options::CHECK_CHARS, &matches)?, ignore_case: matches.get_flag(options::IGNORE_CASE), zero_terminated: matches.get_flag(options::ZERO_TERMINATED), }; if uniq.show_counts && uniq.all_repeated { - return Err(UUsageError::new( + return Err(USimpleError::new( 1, - "printing all duplicated lines and repeat counts is meaningless", + "printing all duplicated lines and repeat counts is meaningless\nTry 'uniq --help' for more information.", )); } @@ -318,11 +599,12 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { } pub fn uu_app() -> Command { - let mut cmd = Command::new(uucore::util_name()) + Command::new(uucore::util_name()) .version(crate_version!()) .about(ABOUT) .override_usage(format_usage(USAGE)) .infer_long_args(true) + .after_help(AFTER_HELP) .arg( Arg::new(options::ALL_REPEATED) .short('D') @@ -356,6 +638,7 @@ pub fn uu_app() -> Command { options::REPEATED, options::ALL_REPEATED, options::UNIQUE, + options::COUNT ]), ) .arg( @@ -397,7 +680,6 @@ pub fn uu_app() -> Command { Arg::new(options::SKIP_FIELDS) .short('f') .long(options::SKIP_FIELDS) - .overrides_with_all(OBSOLETE_SKIP_FIELDS_DIGITS) .help("avoid comparing the first N fields") .value_name("N"), ) @@ -415,42 +697,14 @@ pub fn uu_app() -> Command { .help("end lines with 0 byte, not newline") .action(ArgAction::SetTrue), ) - .group( - // in GNU `uniq` every every digit of these arguments - // would be interpreted as a simple flag, - // these flags then are concatenated to get - // the number of fields to skip. - // in this way `uniq -1 -z -2` would be - // equal to `uniq -12 -q`, since this behavior - // is counterintuitive and it's hard to do in clap - // we handle it more like GNU `fold`: we have a flag - // for each possible initial digit, that takes the - // rest of the value as argument. - // we disallow explicitly multiple occurrences - // because then it would have a different behavior - // from GNU - ArgGroup::new(options::OBSOLETE_SKIP_FIELDS) - .multiple(false) - .args(OBSOLETE_SKIP_FIELDS_DIGITS) - ) .arg( Arg::new(ARG_FILES) .action(ArgAction::Append) .value_parser(ValueParser::os_string()) .num_args(0..=2) + .hide(true) .value_hint(clap::ValueHint::FilePath), - ); - - for i in OBSOLETE_SKIP_FIELDS_DIGITS { - cmd = cmd.arg( - Arg::new(i) - .short(i.chars().next().unwrap()) - .num_args(0..=1) - .hide(true), - ); - } - - cmd + ) } fn get_delimiter(matches: &ArgMatches) -> Delimiters { diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs index 6f8400589..9557dcc76 100644 --- a/src/uucore/src/lib/lib.rs +++ b/src/uucore/src/lib/lib.rs @@ -25,6 +25,7 @@ pub use crate::mods::error; pub use crate::mods::line_ending; pub use crate::mods::os; pub use crate::mods::panic; +pub use crate::mods::posix; // * string parsing modules pub use crate::parser::parse_glob; diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs index 986536d6d..40b5046f2 100644 --- a/src/uucore/src/lib/mods.rs +++ b/src/uucore/src/lib/mods.rs @@ -9,3 +9,4 @@ pub mod error; pub mod line_ending; pub mod os; pub mod panic; +pub mod posix; diff --git a/src/uucore/src/lib/mods/posix.rs b/src/uucore/src/lib/mods/posix.rs new file mode 100644 index 000000000..662880f84 --- /dev/null +++ b/src/uucore/src/lib/mods/posix.rs @@ -0,0 +1,52 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. +// spell-checker:ignore (vars) +//! Iterate over lines, including the line ending character(s). +//! +//! This module provides the [`posix_version`] function, that returns +//! Some(usize) if the `_POSIX2_VERSION` environment variable is defined +//! and has value that can be parsed. +//! Otherwise returns None, so the calling utility would assume default behavior. +//! +//! NOTE: GNU (as of v9.4) recognizes three distinct values for POSIX version: +//! '199209' for POSIX 1003.2-1992, which would define Obsolete mode +//! '200112' for POSIX 1003.1-2001, which is the minimum version for Traditional mode +//! '200809' for POSIX 1003.1-2008, which is the minimum version for Modern mode +//! +//! Utilities that rely on this module: +//! `sort` (TBD) +//! `tail` (TBD) +//! `touch` (TBD) +//! `uniq` +//! +use std::env; + +pub const OBSOLETE: usize = 199209; +pub const TRADITIONAL: usize = 200112; +pub const MODERN: usize = 200809; + +pub fn posix_version() -> Option { + env::var("_POSIX2_VERSION") + .ok() + .and_then(|v| v.parse::().ok()) +} + +#[cfg(test)] +mod tests { + use crate::posix::*; + + #[test] + fn test_posix_version() { + // default + assert_eq!(posix_version(), None); + // set specific version + env::set_var("_POSIX2_VERSION", OBSOLETE.to_string()); + assert_eq!(posix_version(), Some(OBSOLETE)); + env::set_var("_POSIX2_VERSION", TRADITIONAL.to_string()); + assert_eq!(posix_version(), Some(TRADITIONAL)); + env::set_var("_POSIX2_VERSION", MODERN.to_string()); + assert_eq!(posix_version(), Some(MODERN)); + } +} diff --git a/tests/by-util/test_uniq.rs b/tests/by-util/test_uniq.rs index aa41de827..7ebc5c482 100644 --- a/tests/by-util/test_uniq.rs +++ b/tests/by-util/test_uniq.rs @@ -2,10 +2,10 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use std::io::Write; -// spell-checker:ignore nabcd +// spell-checker:ignore nabcd badoption schar use crate::common::util::TestScenario; +use uucore::posix::OBSOLETE; static INPUT: &str = "sorted.txt"; static OUTPUT: &str = "sorted-output.txt"; @@ -118,10 +118,10 @@ fn test_stdin_skip_21_fields_obsolete() { #[test] fn test_stdin_skip_invalid_fields_obsolete() { new_ucmd!() - .args(&["-5deadbeef"]) + .args(&["-5q"]) .run() .failure() - .stderr_only("uniq: Invalid argument for skip-fields: 5deadbeef\n"); + .stderr_contains("error: unexpected argument '-q' found\n"); } #[test] @@ -138,8 +138,7 @@ fn test_all_repeated_followed_by_filename() { let filename = "test.txt"; let (at, mut ucmd) = at_and_ucmd!(); - let mut file = at.make_file(filename); - file.write_all(b"a\na\n").unwrap(); + at.write(filename, "a\na\n"); ucmd.args(&["--all-repeated", filename]) .run() @@ -202,14 +201,13 @@ fn test_stdin_zero_terminated() { } #[test] -fn test_invalid_utf8() { +fn test_gnu_locale_fr_schar() { new_ucmd!() - .arg("not-utf8-sequence.txt") + .args(&["-f1", "locale-fr-schar.txt"]) + .env("LC_ALL", "C") .run() - .failure() - .stderr_only( - "uniq: failed to convert line to utf8: invalid utf-8 sequence of 1 bytes from index 0\n", - ); + .success() + .stdout_is_fixture_bytes("locale-fr-schar.txt"); } #[test] @@ -226,8 +224,7 @@ fn test_group_followed_by_filename() { let filename = "test.txt"; let (at, mut ucmd) = at_and_ucmd!(); - let mut file = at.make_file(filename); - file.write_all(b"a\na\n").unwrap(); + at.write(filename, "a\na\n"); ucmd.args(&["--group", filename]) .run() @@ -521,23 +518,23 @@ fn gnu_tests() { stderr: None, exit: None, }, - // // Obsolete syntax for "-s 1" - // TestCase { - // name: "obs-plus40", - // args: &["+1"], - // input: "aaa\naaa\n", - // stdout: Some("aaa\n"), - // stderr: None, - // exit: None, - // }, - // TestCase { - // name: "obs-plus41", - // args: &["+1"], - // input: "baa\naaa\n", - // stdout: Some("baa\n"), - // stderr: None, - // exit: None, - // }, + // Obsolete syntax for "-s 1" + TestCase { + name: "obs-plus40", + args: &["+1"], + input: "aaa\naaa\n", + stdout: Some("aaa\n"), + stderr: None, + exit: None, + }, + TestCase { + name: "obs-plus41", + args: &["+1"], + input: "baa\naaa\n", + stdout: Some("baa\n"), + stderr: None, + exit: None, + }, TestCase { name: "42", args: &["-s", "1"], @@ -554,7 +551,6 @@ fn gnu_tests() { stderr: None, exit: None, }, - /* // Obsolete syntax for "-s 1" TestCase { name: "obs-plus44", @@ -572,7 +568,6 @@ fn gnu_tests() { stderr: None, exit: None, }, - */ TestCase { name: "50", args: &["-f", "1", "-s", "1"], @@ -757,17 +752,14 @@ fn gnu_tests() { stderr: None, exit: None, }, - /* - Disable as it fails too often. See: - https://github.com/uutils/coreutils/issues/3509 TestCase { name: "112", args: &["-D", "-c"], input: "a a\na b\n", stdout: Some(""), - stderr: Some("uniq: printing all duplicated lines and repeat counts is meaningless"), + stderr: Some("uniq: printing all duplicated lines and repeat counts is meaningless\nTry 'uniq --help' for more information.\n"), exit: Some(1), - },*/ + }, TestCase { name: "113", args: &["--all-repeated=separate"], @@ -816,6 +808,14 @@ fn gnu_tests() { stderr: None, exit: None, }, + TestCase { + name: "119", + args: &["--all-repeated=badoption"], + input: "a a\na b\n", + stdout: Some(""), + stderr: Some("uniq: invalid argument 'badoption' for '--all-repeated'\nValid arguments are:\n - 'none'\n - 'prepend'\n - 'separate'\nTry 'uniq --help' for more information.\n"), + exit: Some(1), + }, // \x08 is the backspace char TestCase { name: "120", @@ -825,6 +825,16 @@ fn gnu_tests() { stderr: None, exit: None, }, + // u128::MAX = 340282366920938463463374607431768211455 + TestCase { + name: "121", + args: &["-d", "-u", "-w340282366920938463463374607431768211456"], + input: "a\na\n\x08", + stdout: Some(""), + stderr: None, + exit: None, + }, + // Test 122 is the same as 121, just different big int overflow number TestCase { name: "123", args: &["--zero-terminated"], @@ -969,16 +979,88 @@ fn gnu_tests() { stderr: None, exit: None, }, + TestCase { + name: "141", + args: &["--group", "-c"], + input: "", + stdout: Some(""), + stderr: Some("uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n"), + exit: Some(1), + }, + TestCase { + name: "142", + args: &["--group", "-d"], + input: "", + stdout: Some(""), + stderr: Some("uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n"), + exit: Some(1), + }, + TestCase { + name: "143", + args: &["--group", "-u"], + input: "", + stdout: Some(""), + stderr: Some("uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n"), + exit: Some(1), + }, + TestCase { + name: "144", + args: &["--group", "-D"], + input: "", + stdout: Some(""), + stderr: Some("uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n"), + exit: Some(1), + }, + TestCase { + name: "145", + args: &["--group=badoption"], + input: "", + stdout: Some(""), + stderr: Some("uniq: invalid argument 'badoption' for '--group'\nValid arguments are:\n - 'prepend'\n - 'append'\n - 'separate'\n - 'both'\nTry 'uniq --help' for more information.\n"), + exit: Some(1), + }, ]; + // run regular version of tests with regular file as input for case in cases { + // prep input file + let (at, mut ucmd) = at_and_ucmd!(); + at.write("input-file", case.input); + + // first - run a version of tests with regular file as input eprintln!("Test {}", case.name); - let result = new_ucmd!().args(case.args).run_piped_stdin(case.input); + // set environment variable for obsolete skip char option tests + if case.name.starts_with("obs-plus") { + ucmd.env("_POSIX2_VERSION", OBSOLETE.to_string()); + } + let result = ucmd.args(case.args).arg("input-file").run(); if let Some(stdout) = case.stdout { result.stdout_is(stdout); } if let Some(stderr) = case.stderr { - result.stderr_contains(stderr); + result.stderr_is(stderr); + } + if let Some(exit) = case.exit { + result.code_is(exit); + } + + // then - ".stdin" version of tests with input piped in + // NOTE: GNU has another variant for stdin redirect from a file + // as in `uniq < input-file` + // For now we treat it as equivalent of piped in stdin variant + // as in `cat input-file | uniq` + eprintln!("Test {}.stdin", case.name); + // set environment variable for obsolete skip char option tests + let mut ucmd = new_ucmd!(); + if case.name.starts_with("obs-plus") { + ucmd.env("_POSIX2_VERSION", OBSOLETE.to_string()); + } + let result = ucmd.args(case.args).run_piped_stdin(case.input); + if let Some(stdout) = case.stdout { + result.stdout_is(stdout); + } + if let Some(stderr) = case.stderr { + result.stderr_is(stderr); } if let Some(exit) = case.exit { result.code_is(exit); diff --git a/tests/fixtures/uniq/locale-fr-schar.txt b/tests/fixtures/uniq/locale-fr-schar.txt new file mode 100644 index 000000000..4e285f37a --- /dev/null +++ b/tests/fixtures/uniq/locale-fr-schar.txt @@ -0,0 +1,2 @@ + y z +  y z