From 17174ab9863dbcf50a9fd379cebd9dd40c451f9b Mon Sep 17 00:00:00 2001
From: Yury Zhytkou <54360928+zhitkoff@users.noreply.github.com>
Date: Sun, 25 Feb 2024 03:45:37 -0500
Subject: [PATCH] `uniq`: pass remaining GNU tests (#5994)

---
 src/uu/uniq/src/uniq.rs                 | 490 ++++++++++++++++++------
 src/uucore/src/lib/lib.rs               |   1 +
 src/uucore/src/lib/mods.rs              |   1 +
 src/uucore/src/lib/mods/posix.rs        |  52 +++
 tests/by-util/test_uniq.rs              | 162 ++++++--
 tests/fixtures/uniq/locale-fr-schar.txt |   2 +
 6 files changed, 550 insertions(+), 158 deletions(-)
 create mode 100644 src/uucore/src/lib/mods/posix.rs
 create mode 100644 tests/fixtures/uniq/locale-fr-schar.txt
diff --git a/src/uu/uniq/src/uniq.rs b/src/uu/uniq/src/uniq.rs
index dd8c9f5b6..e074ebe42 100644
--- a/src/uu/uniq/src/uniq.rs
+++ b/src/uu/uniq/src/uniq.rs
@@ -2,14 +2,18 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-
-use clap::{builder::ValueParser, crate_version, Arg, ArgAction, ArgGroup, ArgMatches, Command};
+// spell-checker:ignore badoption
+use clap::{
+    builder::ValueParser, crate_version, error::ContextKind, error::Error, error::ErrorKind, Arg,
+    ArgAction, ArgMatches, Command,
+};
 use std::ffi::{OsStr, OsString};
 use std::fs::File;
-use std::io::{self, stdin, stdout, BufRead, BufReader, BufWriter, Write};
-use std::str::FromStr;
+use std::io::{stdin, stdout, BufRead, BufReader, BufWriter, Write};
+use std::num::IntErrorKind;
 use uucore::display::Quotable;
-use uucore::error::{FromIo, UResult, USimpleError, UUsageError};
+use uucore::error::{FromIo, UError, UResult, USimpleError};
+use uucore::posix::{posix_version, OBSOLETE};
 use uucore::{format_usage, help_about, help_section, help_usage};
 
 const ABOUT: &str = help_about!("uniq.md");
@@ -23,7 +27,6 @@ pub mod options {
     pub static IGNORE_CASE: &str = "ignore-case";
     pub static REPEATED: &str = "repeated";
     pub static SKIP_FIELDS: &str = "skip-fields";
-    pub static OBSOLETE_SKIP_FIELDS: &str = "obsolete_skip_field";
     pub static SKIP_CHARS: &str = "skip-chars";
     pub static UNIQUE: &str = "unique";
     pub static ZERO_TERMINATED: &str = "zero-terminated";
@@ -54,8 +57,6 @@ struct Uniq {
     zero_terminated: bool,
 }
 
-const OBSOLETE_SKIP_FIELDS_DIGITS: [&str; 10] = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"];
-
 macro_rules! write_line_terminator {
     ($writer:expr, $line_terminator:expr) => {
         $writer
@@ -69,7 +70,7 @@ impl Uniq {
         let mut first_line_printed = false;
         let mut group_count = 1;
         let line_terminator = self.get_line_terminator();
-        let mut lines = reader.split(line_terminator).map(get_line_string);
+        let mut lines = reader.split(line_terminator);
         let mut line = match lines.next() {
             Some(l) => l?,
             None => return Ok(()),
@@ -111,22 +112,28 @@ impl Uniq {
         Ok(())
     }
 
-    fn skip_fields<'a>(&self, line: &'a str) -> &'a str {
+    fn skip_fields(&self, line: &[u8]) -> Vec<u8> {
         if let Some(skip_fields) = self.skip_fields {
-            let mut i = 0;
-            let mut char_indices = line.char_indices();
+            let mut line = line.iter();
+            let mut line_after_skipped_field: Vec<u8>;
             for _ in 0..skip_fields {
-                if char_indices.all(|(_, c)| c.is_whitespace()) {
-                    return "";
+                if line.all(|u| u.is_ascii_whitespace()) {
+                    return Vec::new();
                 }
-                match char_indices.find(|(_, c)| c.is_whitespace()) {
-                    None => return "",
-                    Some((next_field_i, _)) => i = next_field_i,
+                line_after_skipped_field = line
+                    .by_ref()
+                    .skip_while(|u| !u.is_ascii_whitespace())
+                    .copied()
+                    .collect::<Vec<u8>>();
+
+                if line_after_skipped_field.is_empty() {
+                    return Vec::new();
                 }
+                line = line_after_skipped_field.iter();
             }
-            &line[i..]
+            line.copied().collect::<Vec<u8>>()
         } else {
-            line
+            line.to_vec()
         }
     }
 
@@ -138,15 +145,15 @@ impl Uniq {
         }
     }
 
-    fn cmp_keys(&self, first: &str, second: &str) -> bool {
+    fn cmp_keys(&self, first: &[u8], second: &[u8]) -> bool {
         self.cmp_key(first, |first_iter| {
             self.cmp_key(second, |second_iter| first_iter.ne(second_iter))
         })
     }
 
-    fn cmp_key<F>(&self, line: &str, mut closure: F) -> bool
+    fn cmp_key<F>(&self, line: &[u8], mut closure: F) -> bool
     where
-        F: FnMut(&mut dyn Iterator<Item = char>) -> bool,
+        F: FnMut(&mut dyn Iterator<Item = u8>) -> bool,
     {
         let fields_to_check = self.skip_fields(line);
         let len = fields_to_check.len();
@@ -155,28 +162,34 @@ impl Uniq {
         if len > 0 {
             // fast path: avoid doing any work if there is no need to skip or map to lower-case
             if !self.ignore_case && slice_start == 0 && slice_stop == len {
-                return closure(&mut fields_to_check.chars());
+                return closure(&mut fields_to_check.iter().copied());
             }
 
             // fast path: avoid skipping
             if self.ignore_case && slice_start == 0 && slice_stop == len {
-                return closure(&mut fields_to_check.chars().flat_map(char::to_uppercase));
+                return closure(&mut fields_to_check.iter().map(|u| u.to_ascii_lowercase()));
             }
 
-            // fast path: we can avoid mapping chars to upper-case, if we don't want to ignore the case
+            // fast path: we can avoid mapping chars to lower-case, if we don't want to ignore the case
             if !self.ignore_case {
-                return closure(&mut fields_to_check.chars().skip(slice_start).take(slice_stop));
+                return closure(
+                    &mut fields_to_check
+                        .iter()
+                        .skip(slice_start)
+                        .take(slice_stop)
+                        .copied(),
+                );
             }
 
             closure(
                 &mut fields_to_check
-                    .chars()
+                    .iter()
                     .skip(slice_start)
                     .take(slice_stop)
-                    .flat_map(char::to_uppercase),
+                    .map(|u| u.to_ascii_lowercase()),
             )
         } else {
-            closure(&mut fields_to_check.chars())
+            closure(&mut fields_to_check.iter().copied())
         }
     }
 
@@ -196,7 +209,7 @@ impl Uniq {
     fn print_line(
         &self,
         writer: &mut impl Write,
-        line: &str,
+        line: &[u8],
         count: usize,
         first_line_printed: bool,
     ) -> UResult<()> {
@@ -207,9 +220,16 @@ impl Uniq {
         }
 
         if self.show_counts {
-            write!(writer, "{count:7} {line}")
+            let prefix = format!("{count:7} ");
+            let out = prefix
+                .as_bytes()
+                .iter()
+                .chain(line.iter())
+                .copied()
+                .collect::<Vec<u8>>();
+            writer.write_all(out.as_slice())
         } else {
-            writer.write_all(line.as_bytes())
+            writer.write_all(line)
         }
         .map_err_context(|| "Failed to write line".to_string())?;
 
@@ -217,66 +237,328 @@ impl Uniq {
     }
 }
 
-fn get_line_string(io_line: io::Result<Vec<u8>>) -> UResult<String> {
-    let line_bytes = io_line.map_err_context(|| "failed to split lines".to_string())?;
-    String::from_utf8(line_bytes)
-        .map_err(|e| USimpleError::new(1, format!("failed to convert line to utf8: {e}")))
+fn opt_parsed(opt_name: &str, matches: &ArgMatches) -> UResult<Option<usize>> {
+    match matches.get_one::<String>(opt_name) {
+        Some(arg_str) => match arg_str.parse::<usize>() {
+            Ok(v) => Ok(Some(v)),
+            Err(e) => match e.kind() {
+                IntErrorKind::PosOverflow => Ok(Some(usize::MAX)),
+                _ => Err(USimpleError::new(
+                    1,
+                    format!(
+                        "Invalid argument for {}: {}",
+                        opt_name,
+                        arg_str.maybe_quote()
+                    ),
+                )),
+            },
+        },
+        None => Ok(None),
+    }
 }
 
-fn opt_parsed<T: FromStr>(opt_name: &str, matches: &ArgMatches) -> UResult<Option<T>> {
-    Ok(match matches.get_one::<String>(opt_name) {
-        Some(arg_str) => Some(arg_str.parse().map_err(|_| {
-            USimpleError::new(
-                1,
-                format!(
-                    "Invalid argument for {}: {}",
-                    opt_name,
-                    arg_str.maybe_quote()
-                ),
-            )
-        })?),
-        None => None,
-    })
-}
-
-/// Gets number of fields to be skipped from the shorthand option `-N`
+/// Extract obsolete shorthands (if any) for skip fields and skip chars options
+/// following GNU `uniq` behavior
 ///
-/// ```bash
-/// uniq -12345
-/// ```
-/// the first digit isn't interpreted by clap as part of the value
-/// so `get_one()` would return `2345`, then to get the actual value
-/// we loop over every possible first digit, only one of which can be
-/// found in the command line because they conflict with each other,
-/// append the value to it and parse the resulting string as usize,
-/// an error at this point means that a character that isn't a digit was given
-fn obsolete_skip_field(matches: &ArgMatches) -> UResult<Option<usize>> {
-    for opt_text in OBSOLETE_SKIP_FIELDS_DIGITS {
-        let argument = matches.get_one::<String>(opt_text);
-        if matches.contains_id(opt_text) {
-            let mut full = opt_text.to_owned();
-            if let Some(ar) = argument {
-                full.push_str(ar);
-            }
-            let value = full.parse::<usize>();
+/// Examples for obsolete skip fields option
+/// `uniq -1 file` would equal `uniq -f1 file`
+/// `uniq -1 -2 -3 file` would equal `uniq -f123 file`
+/// `uniq -1 -2 -f5 file` would equal `uniq -f5 file`
+/// `uniq -u20s4 file` would equal `uniq -u -f20 -s4 file`
+/// `uniq -D1w3 -3 file` would equal `uniq -D -f3 -w3 file`
+///
+/// Examples for obsolete skip chars option
+/// `uniq +1 file` would equal `uniq -s1 file`
+/// `uniq +1 -s2 file` would equal `uniq -s2 file`
+/// `uniq -s2 +3 file` would equal `uniq -s3 file`
+///
+fn handle_obsolete(args: impl uucore::Args) -> (Vec<OsString>, Option<usize>, Option<usize>) {
+    let mut skip_fields_old = None;
+    let mut skip_chars_old = None;
+    let mut preceding_long_opt_req_value = false;
+    let mut preceding_short_opt_req_value = false;
 
-            if let Ok(val) = value {
-                return Ok(Some(val));
-            } else {
-                return Err(USimpleError {
-                    code: 1,
-                    message: format!("Invalid argument for skip-fields: {}", full),
-                }
-                .into());
+    let filtered_args = args
+        .filter_map(|os_slice| {
+            filter_args(
+                os_slice,
+                &mut skip_fields_old,
+                &mut skip_chars_old,
+                &mut preceding_long_opt_req_value,
+                &mut preceding_short_opt_req_value,
+            )
+        })
+        .collect();
+
+    // exacted String values (if any) for skip_fields_old and skip_chars_old
+    // are guaranteed to consist of ascii digit chars only at this point
+    // so, it is safe to parse into usize and collapse Result into Option
+    let skip_fields_old: Option<usize> = skip_fields_old.and_then(|v| v.parse::<usize>().ok());
+    let skip_chars_old: Option<usize> = skip_chars_old.and_then(|v| v.parse::<usize>().ok());
+
+    (filtered_args, skip_fields_old, skip_chars_old)
+}
+
+fn filter_args(
+    os_slice: OsString,
+    skip_fields_old: &mut Option<String>,
+    skip_chars_old: &mut Option<String>,
+    preceding_long_opt_req_value: &mut bool,
+    preceding_short_opt_req_value: &mut bool,
+) -> Option<OsString> {
+    let filter: Option<OsString>;
+    if let Some(slice) = os_slice.to_str() {
+        if should_extract_obs_skip_fields(
+            slice,
+            preceding_long_opt_req_value,
+            preceding_short_opt_req_value,
+        ) {
+            // start of the short option string
+            // that can have obsolete skip fields option value in it
+            filter = handle_extract_obs_skip_fields(slice, skip_fields_old);
+        } else if should_extract_obs_skip_chars(
+            slice,
+            preceding_long_opt_req_value,
+            preceding_short_opt_req_value,
+        ) {
+            // the obsolete skip chars option
+            filter = handle_extract_obs_skip_chars(slice, skip_chars_old);
+        } else {
+            // either not a short option
+            // or a short option that cannot have obsolete lines value in it
+            filter = Some(OsString::from(slice));
+            // Check and reset to None obsolete values extracted so far
+            // if corresponding new/documented options are encountered next.
+            // NOTE: For skip fields - occurrences of corresponding new/documented options
+            // inside combined short options ike '-u20s4' or '-D1w3', etc
+            // are also covered in `handle_extract_obs_skip_fields()` function
+            if slice.starts_with("-f") {
+                *skip_fields_old = None;
+            }
+            if slice.starts_with("-s") {
+                *skip_chars_old = None;
             }
         }
+        handle_preceding_options(
+            slice,
+            preceding_long_opt_req_value,
+            preceding_short_opt_req_value,
+        );
+    } else {
+        // Cannot cleanly convert os_slice to UTF-8
+        // Do not process and return as-is
+        // This will cause failure later on, but we should not handle it here
+        // and let clap panic on invalid UTF-8 argument
+        filter = Some(os_slice);
     }
-    Ok(None)
+    filter
+}
+
+/// Helper function to [`filter_args`]
+/// Checks if the slice is a true short option (and not hyphen prefixed value of an option)
+/// and if so, a short option that can contain obsolete skip fields value
+fn should_extract_obs_skip_fields(
+    slice: &str,
+    preceding_long_opt_req_value: &bool,
+    preceding_short_opt_req_value: &bool,
+) -> bool {
+    slice.starts_with('-')
+        && !slice.starts_with("--")
+        && !preceding_long_opt_req_value
+        && !preceding_short_opt_req_value
+        && !slice.starts_with("-s")
+        && !slice.starts_with("-f")
+        && !slice.starts_with("-w")
+}
+
+/// Helper function to [`filter_args`]
+/// Checks if the slice is a true obsolete skip chars short option
+fn should_extract_obs_skip_chars(
+    slice: &str,
+    preceding_long_opt_req_value: &bool,
+    preceding_short_opt_req_value: &bool,
+) -> bool {
+    slice.starts_with('+')
+        && posix_version().is_some_and(|v| v <= OBSOLETE)
+        && !preceding_long_opt_req_value
+        && !preceding_short_opt_req_value
+        && slice.chars().nth(1).map_or(false, |c| c.is_ascii_digit())
+}
+
+/// Helper function to [`filter_args`]
+/// Captures if current slice is a preceding option
+/// that requires value
+fn handle_preceding_options(
+    slice: &str,
+    preceding_long_opt_req_value: &mut bool,
+    preceding_short_opt_req_value: &mut bool,
+) {
+    // capture if current slice is a preceding long option that requires value and does not use '=' to assign that value
+    // following slice should be treaded as value for this option
+    // even if it starts with '-' (which would be treated as hyphen prefixed value)
+    if slice.starts_with("--") {
+        use options as O;
+        *preceding_long_opt_req_value = &slice[2..] == O::SKIP_CHARS
+            || &slice[2..] == O::SKIP_FIELDS
+            || &slice[2..] == O::CHECK_CHARS
+            || &slice[2..] == O::GROUP
+            || &slice[2..] == O::ALL_REPEATED;
+    }
+    // capture if current slice is a preceding short option that requires value and does not have value in the same slice (value separated by whitespace)
+    // following slice should be treaded as value for this option
+    // even if it starts with '-' (which would be treated as hyphen prefixed value)
+    *preceding_short_opt_req_value = slice == "-s" || slice == "-f" || slice == "-w";
+    // slice is a value
+    // reset preceding option flags
+    if !slice.starts_with('-') {
+        *preceding_short_opt_req_value = false;
+        *preceding_long_opt_req_value = false;
+    }
+}
+
+/// Helper function to [`filter_args`]
+/// Extracts obsolete skip fields numeric part from argument slice
+/// and filters it out
+fn handle_extract_obs_skip_fields(
+    slice: &str,
+    skip_fields_old: &mut Option<String>,
+) -> Option<OsString> {
+    let mut obs_extracted: Vec<char> = vec![];
+    let mut obs_end_reached = false;
+    let mut obs_overwritten_by_new = false;
+    let filtered_slice: Vec<char> = slice
+        .chars()
+        .filter(|c| {
+            if c.eq(&'f') {
+                // any extracted obsolete skip fields value up to this point should be discarded
+                // as the new/documented option for skip fields was used after it
+                // i.e. in situation like `-u12f3`
+                // The obsolete skip fields value should still be extracted, filtered out
+                // but the skip_fields_old should be set to None instead of Some(String) later on
+                obs_overwritten_by_new = true;
+            }
+            // To correctly process scenario like '-u20s4' or '-D1w3', etc
+            // we need to stop extracting digits once alphabetic character is encountered
+            // after we already have something in obs_extracted
+            if c.is_ascii_digit() && !obs_end_reached {
+                obs_extracted.push(*c);
+                false
+            } else {
+                if !obs_extracted.is_empty() {
+                    obs_end_reached = true;
+                }
+                true
+            }
+        })
+        .collect();
+
+    if obs_extracted.is_empty() {
+        // no obsolete value found/extracted
+        Some(OsString::from(slice))
+    } else {
+        // obsolete value was extracted
+        // unless there was new/documented option for skip fields used after it
+        // set the skip_fields_old value (concatenate to it if there was a value there already)
+        if obs_overwritten_by_new {
+            *skip_fields_old = None;
+        } else {
+            let mut extracted: String = obs_extracted.iter().collect();
+            if let Some(val) = skip_fields_old {
+                extracted.push_str(val);
+            }
+            *skip_fields_old = Some(extracted);
+        }
+        if filtered_slice.get(1).is_some() {
+            // there were some short options in front of or after obsolete lines value
+            // i.e. '-u20s4' or '-D1w3' or similar, which after extraction of obsolete lines value
+            // would look like '-us4' or '-Dw3' or similar
+            let filtered_slice: String = filtered_slice.iter().collect();
+            Some(OsString::from(filtered_slice))
+        } else {
+            None
+        }
+    }
+}
+
+/// Helper function to [`filter_args`]
+/// Extracts obsolete skip chars numeric part from argument slice
+fn handle_extract_obs_skip_chars(
+    slice: &str,
+    skip_chars_old: &mut Option<String>,
+) -> Option<OsString> {
+    let mut obs_extracted: Vec<char> = vec![];
+    let mut slice_chars = slice.chars();
+    slice_chars.next(); // drop leading '+' character
+    for c in slice_chars {
+        if c.is_ascii_digit() {
+            obs_extracted.push(c);
+        } else {
+            // for obsolete skip chars option the whole value after '+' should be numeric
+            // so, if any non-digit characters are encountered in the slice (i.e. `+1q`, etc)
+            // set skip_chars_old to None and return whole slice back.
+            // It will be parsed by clap and panic with appropriate error message
+            *skip_chars_old = None;
+            return Some(OsString::from(slice));
+        }
+    }
+    if obs_extracted.is_empty() {
+        // no obsolete value found/extracted
+        // i.e. it was just '+' character alone
+        Some(OsString::from(slice))
+    } else {
+        // successfully extracted numeric value
+        // capture it and return None to filter out the whole slice
+        *skip_chars_old = Some(obs_extracted.iter().collect());
+        None
+    }
+}
+
+/// Maps Clap errors to USimpleError and overrides 3 specific ones
+/// to meet requirements of GNU tests for `uniq`.
+/// Unfortunately these overrides are necessary, since several GNU tests
+/// for `uniq` hardcode and require the exact wording of the error message
+/// and it is not compatible with how Clap formats and displays those error messages.
+fn map_clap_errors(clap_error: &Error) -> Box<dyn UError> {
+    let footer = "Try 'uniq --help' for more information.";
+    let override_arg_conflict =
+        "--group is mutually exclusive with -c/-d/-D/-u\n".to_string() + footer;
+    let override_group_badoption = "invalid argument 'badoption' for '--group'\nValid arguments are:\n  - 'prepend'\n  - 'append'\n  - 'separate'\n  - 'both'\n".to_string() + footer;
+    let override_all_repeated_badoption = "invalid argument 'badoption' for '--all-repeated'\nValid arguments are:\n  - 'none'\n  - 'prepend'\n  - 'separate'\n".to_string() + footer;
+
+    let error_message = match clap_error.kind() {
+        ErrorKind::ArgumentConflict => override_arg_conflict,
+        ErrorKind::InvalidValue
+            if clap_error
+                .get(ContextKind::InvalidValue)
+                .is_some_and(|v| v.to_string() == "badoption")
+                && clap_error
+                    .get(ContextKind::InvalidArg)
+                    .is_some_and(|v| v.to_string().starts_with("--group")) =>
+        {
+            override_group_badoption
+        }
+        ErrorKind::InvalidValue
+            if clap_error
+                .get(ContextKind::InvalidValue)
+                .is_some_and(|v| v.to_string() == "badoption")
+                && clap_error
+                    .get(ContextKind::InvalidArg)
+                    .is_some_and(|v| v.to_string().starts_with("--all-repeated")) =>
+        {
+            override_all_repeated_badoption
+        }
+        _ => clap_error.to_string(),
+    };
+    USimpleError::new(1, error_message)
 }
 
 #[uucore::main]
 pub fn uumain(args: impl uucore::Args) -> UResult<()> {
-    let matches = uu_app().after_help(AFTER_HELP).try_get_matches_from(args)?;
+    let (args, skip_fields_old, skip_chars_old) = handle_obsolete(args);
+
+    let matches = uu_app()
+        .try_get_matches_from(args)
+        .map_err(|e| map_clap_errors(&e))?;
 
     let files = matches.get_many::<OsString>(ARG_FILES);
 
@@ -286,8 +568,7 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
         .unwrap_or_default();
 
     let skip_fields_modern: Option<usize> = opt_parsed(options::SKIP_FIELDS, &matches)?;
-
-    let skip_fields_old: Option<usize> = obsolete_skip_field(&matches)?;
+    let skip_chars_modern: Option<usize> = opt_parsed(options::SKIP_CHARS, &matches)?;
 
     let uniq = Uniq {
         repeats_only: matches.get_flag(options::REPEATED)
@@ -298,16 +579,16 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
         delimiters: get_delimiter(&matches),
         show_counts: matches.get_flag(options::COUNT),
         skip_fields: skip_fields_modern.or(skip_fields_old),
-        slice_start: opt_parsed(options::SKIP_CHARS, &matches)?,
+        slice_start: skip_chars_modern.or(skip_chars_old),
         slice_stop: opt_parsed(options::CHECK_CHARS, &matches)?,
         ignore_case: matches.get_flag(options::IGNORE_CASE),
         zero_terminated: matches.get_flag(options::ZERO_TERMINATED),
     };
 
     if uniq.show_counts && uniq.all_repeated {
-        return Err(UUsageError::new(
+        return Err(USimpleError::new(
             1,
-            "printing all duplicated lines and repeat counts is meaningless",
+            "printing all duplicated lines and repeat counts is meaningless\nTry 'uniq --help' for more information.",
         ));
     }
 
@@ -318,11 +599,12 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
 }
 
 pub fn uu_app() -> Command {
-    let mut cmd = Command::new(uucore::util_name())
+    Command::new(uucore::util_name())
         .version(crate_version!())
         .about(ABOUT)
         .override_usage(format_usage(USAGE))
         .infer_long_args(true)
+        .after_help(AFTER_HELP)
         .arg(
             Arg::new(options::ALL_REPEATED)
                 .short('D')
@@ -356,6 +638,7 @@ pub fn uu_app() -> Command {
                     options::REPEATED,
                     options::ALL_REPEATED,
                     options::UNIQUE,
+                    options::COUNT
                 ]),
         )
         .arg(
@@ -397,7 +680,6 @@ pub fn uu_app() -> Command {
             Arg::new(options::SKIP_FIELDS)
                 .short('f')
                 .long(options::SKIP_FIELDS)
-                .overrides_with_all(OBSOLETE_SKIP_FIELDS_DIGITS)
                 .help("avoid comparing the first N fields")
                 .value_name("N"),
         )
@@ -415,42 +697,14 @@ pub fn uu_app() -> Command {
                 .help("end lines with 0 byte, not newline")
                 .action(ArgAction::SetTrue),
         )
-        .group(
-            // in GNU `uniq` every every digit of these arguments
-            // would be interpreted as a simple flag,
-            // these flags then are concatenated to get
-            // the number of fields to skip.
-            // in this way `uniq -1 -z -2` would be
-            // equal to `uniq -12 -q`, since this behavior
-            // is counterintuitive and it's hard to do in clap
-            // we handle it more like GNU `fold`: we have a flag
-            // for each possible initial digit, that takes the
-            // rest of the value as argument.
-            // we disallow explicitly multiple occurrences
-            // because then it would have a different behavior
-            // from GNU
-            ArgGroup::new(options::OBSOLETE_SKIP_FIELDS)
-                .multiple(false)
-                .args(OBSOLETE_SKIP_FIELDS_DIGITS)
-        )
         .arg(
             Arg::new(ARG_FILES)
                 .action(ArgAction::Append)
                 .value_parser(ValueParser::os_string())
                 .num_args(0..=2)
+                .hide(true)
                 .value_hint(clap::ValueHint::FilePath),
-        );
-
-    for i in OBSOLETE_SKIP_FIELDS_DIGITS {
-        cmd = cmd.arg(
-            Arg::new(i)
-                .short(i.chars().next().unwrap())
-                .num_args(0..=1)
-                .hide(true),
-        );
-    }
-
-    cmd
+        )
 }
 
 fn get_delimiter(matches: &ArgMatches) -> Delimiters {
diff --git a/src/uucore/src/lib/lib.rs b/src/uucore/src/lib/lib.rs
index 6f8400589..9557dcc76 100644
--- a/src/uucore/src/lib/lib.rs
+++ b/src/uucore/src/lib/lib.rs
@@ -25,6 +25,7 @@ pub use crate::mods::error;
 pub use crate::mods::line_ending;
 pub use crate::mods::os;
 pub use crate::mods::panic;
+pub use crate::mods::posix;
 
 // * string parsing modules
 pub use crate::parser::parse_glob;
diff --git a/src/uucore/src/lib/mods.rs b/src/uucore/src/lib/mods.rs
index 986536d6d..40b5046f2 100644
--- a/src/uucore/src/lib/mods.rs
+++ b/src/uucore/src/lib/mods.rs
@@ -9,3 +9,4 @@ pub mod error;
 pub mod line_ending;
 pub mod os;
 pub mod panic;
+pub mod posix;
diff --git a/src/uucore/src/lib/mods/posix.rs b/src/uucore/src/lib/mods/posix.rs
new file mode 100644
index 000000000..662880f84
--- /dev/null
+++ b/src/uucore/src/lib/mods/posix.rs
@@ -0,0 +1,52 @@
+// This file is part of the uutils coreutils package.
+//
+// For the full copyright and license information, please view the LICENSE
+// file that was distributed with this source code.
+// spell-checker:ignore (vars)
+//! Iterate over lines, including the line ending character(s).
+//!
+//! This module provides the [`posix_version`] function, that returns
+//! Some(usize) if the `_POSIX2_VERSION` environment variable is defined
+//! and has value that can be parsed.
+//! Otherwise returns None, so the calling utility would assume default behavior.
+//!
+//! NOTE: GNU (as of v9.4) recognizes three distinct values for POSIX version:
+//! '199209' for POSIX 1003.2-1992, which would define Obsolete mode
+//! '200112' for POSIX 1003.1-2001, which is the minimum version for Traditional mode
+//! '200809' for POSIX 1003.1-2008, which is the minimum version for Modern mode
+//!
+//! Utilities that rely on this module:
+//! `sort` (TBD)
+//! `tail` (TBD)
+//! `touch` (TBD)
+//! `uniq`
+//!
+use std::env;
+
+pub const OBSOLETE: usize = 199209;
+pub const TRADITIONAL: usize = 200112;
+pub const MODERN: usize = 200809;
+
+pub fn posix_version() -> Option<usize> {
+    env::var("_POSIX2_VERSION")
+        .ok()
+        .and_then(|v| v.parse::<usize>().ok())
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::posix::*;
+
+    #[test]
+    fn test_posix_version() {
+        // default
+        assert_eq!(posix_version(), None);
+        // set specific version
+        env::set_var("_POSIX2_VERSION", OBSOLETE.to_string());
+        assert_eq!(posix_version(), Some(OBSOLETE));
+        env::set_var("_POSIX2_VERSION", TRADITIONAL.to_string());
+        assert_eq!(posix_version(), Some(TRADITIONAL));
+        env::set_var("_POSIX2_VERSION", MODERN.to_string());
+        assert_eq!(posix_version(), Some(MODERN));
+    }
+}
diff --git a/tests/by-util/test_uniq.rs b/tests/by-util/test_uniq.rs
index aa41de827..7ebc5c482 100644
--- a/tests/by-util/test_uniq.rs
+++ b/tests/by-util/test_uniq.rs
@@ -2,10 +2,10 @@
 //
 // For the full copyright and license information, please view the LICENSE
 // file that was distributed with this source code.
-use std::io::Write;
 
-// spell-checker:ignore nabcd
+// spell-checker:ignore nabcd badoption schar
 use crate::common::util::TestScenario;
+use uucore::posix::OBSOLETE;
 
 static INPUT: &str = "sorted.txt";
 static OUTPUT: &str = "sorted-output.txt";
@@ -118,10 +118,10 @@ fn test_stdin_skip_21_fields_obsolete() {
 #[test]
 fn test_stdin_skip_invalid_fields_obsolete() {
     new_ucmd!()
-        .args(&["-5deadbeef"])
+        .args(&["-5q"])
         .run()
         .failure()
-        .stderr_only("uniq: Invalid argument for skip-fields: 5deadbeef\n");
+        .stderr_contains("error: unexpected argument '-q' found\n");
 }
 
 #[test]
@@ -138,8 +138,7 @@ fn test_all_repeated_followed_by_filename() {
     let filename = "test.txt";
     let (at, mut ucmd) = at_and_ucmd!();
 
-    let mut file = at.make_file(filename);
-    file.write_all(b"a\na\n").unwrap();
+    at.write(filename, "a\na\n");
 
     ucmd.args(&["--all-repeated", filename])
         .run()
@@ -202,14 +201,13 @@ fn test_stdin_zero_terminated() {
 }
 
 #[test]
-fn test_invalid_utf8() {
+fn test_gnu_locale_fr_schar() {
     new_ucmd!()
-        .arg("not-utf8-sequence.txt")
+        .args(&["-f1", "locale-fr-schar.txt"])
+        .env("LC_ALL", "C")
         .run()
-        .failure()
-        .stderr_only(
-        "uniq: failed to convert line to utf8: invalid utf-8 sequence of 1 bytes from index 0\n",
-    );
+        .success()
+        .stdout_is_fixture_bytes("locale-fr-schar.txt");
 }
 
 #[test]
@@ -226,8 +224,7 @@ fn test_group_followed_by_filename() {
     let filename = "test.txt";
     let (at, mut ucmd) = at_and_ucmd!();
 
-    let mut file = at.make_file(filename);
-    file.write_all(b"a\na\n").unwrap();
+    at.write(filename, "a\na\n");
 
     ucmd.args(&["--group", filename])
         .run()
@@ -521,23 +518,23 @@ fn gnu_tests() {
             stderr: None,
             exit: None,
         },
-        // // Obsolete syntax for "-s 1"
-        // TestCase {
-        //     name: "obs-plus40",
-        //     args: &["+1"],
-        //     input: "aaa\naaa\n",
-        //     stdout: Some("aaa\n"),
-        //     stderr: None,
-        //     exit: None,
-        // },
-        // TestCase {
-        //     name: "obs-plus41",
-        //     args: &["+1"],
-        //     input: "baa\naaa\n",
-        //     stdout: Some("baa\n"),
-        //     stderr: None,
-        //     exit: None,
-        // },
+        // Obsolete syntax for "-s 1"
+        TestCase {
+            name: "obs-plus40",
+            args: &["+1"],
+            input: "aaa\naaa\n",
+            stdout: Some("aaa\n"),
+            stderr: None,
+            exit: None,
+        },
+        TestCase {
+            name: "obs-plus41",
+            args: &["+1"],
+            input: "baa\naaa\n",
+            stdout: Some("baa\n"),
+            stderr: None,
+            exit: None,
+        },
         TestCase {
             name: "42",
             args: &["-s", "1"],
@@ -554,7 +551,6 @@ fn gnu_tests() {
             stderr: None,
             exit: None,
         },
-        /*
         // Obsolete syntax for "-s 1"
         TestCase {
             name: "obs-plus44",
@@ -572,7 +568,6 @@ fn gnu_tests() {
             stderr: None,
             exit: None,
         },
-        */
         TestCase {
             name: "50",
             args: &["-f", "1", "-s", "1"],
@@ -757,17 +752,14 @@ fn gnu_tests() {
             stderr: None,
             exit: None,
         },
-        /*
-        Disable as it fails too often. See:
-        https://github.com/uutils/coreutils/issues/3509
         TestCase {
             name: "112",
             args: &["-D", "-c"],
             input: "a a\na b\n",
             stdout: Some(""),
-            stderr: Some("uniq: printing all duplicated lines and repeat counts is meaningless"),
+            stderr: Some("uniq: printing all duplicated lines and repeat counts is meaningless\nTry 'uniq --help' for more information.\n"),
             exit: Some(1),
-        },*/
+        },
         TestCase {
             name: "113",
             args: &["--all-repeated=separate"],
@@ -816,6 +808,14 @@ fn gnu_tests() {
             stderr: None,
             exit: None,
         },
+        TestCase {
+            name: "119",
+            args: &["--all-repeated=badoption"],
+            input: "a a\na b\n",
+            stdout: Some(""),
+            stderr: Some("uniq: invalid argument 'badoption' for '--all-repeated'\nValid arguments are:\n  - 'none'\n  - 'prepend'\n  - 'separate'\nTry 'uniq --help' for more information.\n"),
+            exit: Some(1),
+        },
         // \x08 is the backspace char
         TestCase {
             name: "120",
@@ -825,6 +825,16 @@ fn gnu_tests() {
             stderr: None,
             exit: None,
         },
+        // u128::MAX = 340282366920938463463374607431768211455
+        TestCase {
+            name: "121",
+            args: &["-d", "-u", "-w340282366920938463463374607431768211456"],
+            input: "a\na\n\x08",
+            stdout: Some(""),
+            stderr: None,
+            exit: None,
+        },
+        // Test 122 is the same as 121, just different big int overflow number
         TestCase {
             name: "123",
             args: &["--zero-terminated"],
@@ -969,16 +979,88 @@ fn gnu_tests() {
             stderr: None,
             exit: None,
         },
+        TestCase {
+            name: "141",
+            args: &["--group", "-c"],
+            input: "",
+            stdout: Some(""),
+            stderr: Some("uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n"),
+            exit: Some(1),
+        },
+        TestCase {
+            name: "142",
+            args: &["--group", "-d"],
+            input: "",
+            stdout: Some(""),
+            stderr: Some("uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n"),
+            exit: Some(1),
+        },
+        TestCase {
+            name: "143",
+            args: &["--group", "-u"],
+            input: "",
+            stdout: Some(""),
+            stderr: Some("uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n"),
+            exit: Some(1),
+        },
+        TestCase {
+            name: "144",
+            args: &["--group", "-D"],
+            input: "",
+            stdout: Some(""),
+            stderr: Some("uniq: --group is mutually exclusive with -c/-d/-D/-u\nTry 'uniq --help' for more information.\n"),
+            exit: Some(1),
+        },
+        TestCase {
+            name: "145",
+            args: &["--group=badoption"],
+            input: "",
+            stdout: Some(""),
+            stderr: Some("uniq: invalid argument 'badoption' for '--group'\nValid arguments are:\n  - 'prepend'\n  - 'append'\n  - 'separate'\n  - 'both'\nTry 'uniq --help' for more information.\n"),
+            exit: Some(1),
+        },
     ];
 
+    // run regular version of tests with regular file as input
     for case in cases {
+        // prep input file
+        let (at, mut ucmd) = at_and_ucmd!();
+        at.write("input-file", case.input);
+
+        // first - run a version of tests with regular file as input
         eprintln!("Test {}", case.name);
-        let result = new_ucmd!().args(case.args).run_piped_stdin(case.input);
+        // set environment variable for obsolete skip char option tests
+        if case.name.starts_with("obs-plus") {
+            ucmd.env("_POSIX2_VERSION", OBSOLETE.to_string());
+        }
+        let result = ucmd.args(case.args).arg("input-file").run();
         if let Some(stdout) = case.stdout {
             result.stdout_is(stdout);
         }
         if let Some(stderr) = case.stderr {
-            result.stderr_contains(stderr);
+            result.stderr_is(stderr);
+        }
+        if let Some(exit) = case.exit {
+            result.code_is(exit);
+        }
+
+        // then - ".stdin" version of tests with input piped in
+        // NOTE: GNU has another variant for stdin redirect from a file
+        // as in `uniq < input-file`
+        // For now we treat it as equivalent of piped in stdin variant
+        // as in `cat input-file | uniq`
+        eprintln!("Test {}.stdin", case.name);
+        // set environment variable for obsolete skip char option tests
+        let mut ucmd = new_ucmd!();
+        if case.name.starts_with("obs-plus") {
+            ucmd.env("_POSIX2_VERSION", OBSOLETE.to_string());
+        }
+        let result = ucmd.args(case.args).run_piped_stdin(case.input);
+        if let Some(stdout) = case.stdout {
+            result.stdout_is(stdout);
+        }
+        if let Some(stderr) = case.stderr {
+            result.stderr_is(stderr);
         }
         if let Some(exit) = case.exit {
             result.code_is(exit);
diff --git a/tests/fixtures/uniq/locale-fr-schar.txt b/tests/fixtures/uniq/locale-fr-schar.txt
new file mode 100644
index 000000000..4e285f37a
--- /dev/null
+++ b/tests/fixtures/uniq/locale-fr-schar.txt
@@ -0,0 +1,2 @@
+ y z
+� y z