1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-27 11:07:44 +00:00

env: support string args by "-S", "-vS" or "--split-strings"

This commit is contained in:
Ulrich Hornung 2024-03-14 19:38:28 +01:00
parent 6f95d058a2
commit c62ba559d0
14 changed files with 2587 additions and 207 deletions

View file

@ -25,6 +25,7 @@ sudoedit
tcsh tcsh
tzselect tzselect
urandom urandom
VARNAME
wtmp wtmp
zsh zsh

36
Cargo.lock generated
View file

@ -547,7 +547,7 @@ dependencies = [
"lazy_static", "lazy_static",
"proc-macro2", "proc-macro2",
"regex", "regex",
"syn 2.0.23", "syn 2.0.32",
"unicode-xid", "unicode-xid",
] ]
@ -559,7 +559,7 @@ checksum = "3e1a2532e4ed4ea13031c13bc7bc0dbca4aae32df48e9d77f0d1e743179f2ea1"
dependencies = [ dependencies = [
"lazy_static", "lazy_static",
"proc-macro2", "proc-macro2",
"syn 2.0.23", "syn 2.0.32",
] ]
[[package]] [[package]]
@ -574,7 +574,7 @@ dependencies = [
"lazy_static", "lazy_static",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.23", "syn 2.0.32",
] ]
[[package]] [[package]]
@ -942,7 +942,7 @@ checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.23", "syn 2.0.32",
] ]
[[package]] [[package]]
@ -1791,7 +1791,7 @@ dependencies = [
"regex", "regex",
"relative-path", "relative-path",
"rustc_version", "rustc_version",
"syn 2.0.23", "syn 2.0.32",
"unicode-ident", "unicode-ident",
] ]
@ -1902,9 +1902,23 @@ checksum = "e25dfac463d778e353db5be2449d1cce89bd6fd23c9f1ea21310ce6e5a1b29c4"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.147" version = "1.0.193"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d193d69bae983fc11a79df82342761dfbf28a99fc8d203dca4c3c1b590948965" checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.193"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.32",
]
[[package]] [[package]]
name = "sha1" name = "sha1"
@ -2039,9 +2053,9 @@ dependencies = [
[[package]] [[package]]
name = "syn" name = "syn"
version = "2.0.23" version = "2.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737" checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -3296,7 +3310,7 @@ dependencies = [
"once_cell", "once_cell",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.23", "syn 2.0.32",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]
@ -3318,7 +3332,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.23", "syn 2.0.32",
"wasm-bindgen-backend", "wasm-bindgen-backend",
"wasm-bindgen-shared", "wasm-bindgen-shared",
] ]

272
src/uu/env/src/env.rs vendored
View file

@ -5,18 +5,32 @@
// spell-checker:ignore (ToDO) chdir execvp progname subcommand subcommands unsets setenv putenv spawnp SIGSEGV SIGBUS sigaction // spell-checker:ignore (ToDO) chdir execvp progname subcommand subcommands unsets setenv putenv spawnp SIGSEGV SIGBUS sigaction
pub mod native_int_str;
pub mod parse_error;
pub mod split_iterator;
pub mod string_expander;
pub mod string_parser;
pub mod variable_parser;
use clap::builder::ValueParser;
use clap::{crate_name, crate_version, Arg, ArgAction, Command}; use clap::{crate_name, crate_version, Arg, ArgAction, Command};
use ini::Ini; use ini::Ini;
use native_int_str::{
from_native_int_representation_owned, Convert, NCvt, NativeIntStr, NativeIntString, NativeStr,
};
#[cfg(unix)] #[cfg(unix)]
use nix::sys::signal::{raise, sigaction, SaFlags, SigAction, SigHandler, SigSet, Signal}; use nix::sys::signal::{raise, sigaction, SaFlags, SigAction, SigHandler, SigSet, Signal};
use std::borrow::Cow; use std::borrow::Cow;
use std::env; use std::env;
use std::ffi::{OsStr, OsString};
use std::io::{self, Write}; use std::io::{self, Write};
use std::ops::Deref;
#[cfg(unix)] #[cfg(unix)]
use std::os::unix::process::ExitStatusExt; use std::os::unix::process::ExitStatusExt;
use std::process; use std::process::{self};
use uucore::display::Quotable; use uucore::display::Quotable;
use uucore::error::{UClapError, UResult, USimpleError, UUsageError}; use uucore::error::{ExitCode, UError, UResult, USimpleError, UUsageError};
use uucore::line_ending::LineEnding; use uucore::line_ending::LineEnding;
use uucore::{format_usage, help_about, help_section, help_usage, show_warning}; use uucore::{format_usage, help_about, help_section, help_usage, show_warning};
@ -24,14 +38,16 @@ const ABOUT: &str = help_about!("env.md");
const USAGE: &str = help_usage!("env.md"); const USAGE: &str = help_usage!("env.md");
const AFTER_HELP: &str = help_section!("after help", "env.md"); const AFTER_HELP: &str = help_section!("after help", "env.md");
const ERROR_MSG_S_SHEBANG: &str = "use -[v]S to pass options in shebang lines";
struct Options<'a> { struct Options<'a> {
ignore_env: bool, ignore_env: bool,
line_ending: LineEnding, line_ending: LineEnding,
running_directory: Option<&'a str>, running_directory: Option<&'a OsStr>,
files: Vec<&'a str>, files: Vec<&'a OsStr>,
unsets: Vec<&'a str>, unsets: Vec<&'a OsStr>,
sets: Vec<(&'a str, &'a str)>, sets: Vec<(Cow<'a, OsStr>, Cow<'a, OsStr>)>,
program: Vec<&'a str>, program: Vec<&'a OsStr>,
} }
// print name=value env pairs on screen // print name=value env pairs on screen
@ -44,13 +60,13 @@ fn print_env(line_ending: LineEnding) {
} }
} }
fn parse_name_value_opt<'a>(opts: &mut Options<'a>, opt: &'a str) -> UResult<bool> { fn parse_name_value_opt<'a>(opts: &mut Options<'a>, opt: &'a OsStr) -> UResult<bool> {
// is it a NAME=VALUE like opt ? // is it a NAME=VALUE like opt ?
if let Some(idx) = opt.find('=') { let wrap = NativeStr::<'a>::new(opt);
let split_o = wrap.split_once(&'=');
if let Some((name, value)) = split_o {
// yes, so push name, value pair // yes, so push name, value pair
let (name, value) = opt.split_at(idx); opts.sets.push((name, value));
opts.sets.push((name, &value['='.len_utf8()..]));
Ok(false) Ok(false)
} else { } else {
// no, it's a program-like opt // no, it's a program-like opt
@ -58,7 +74,7 @@ fn parse_name_value_opt<'a>(opts: &mut Options<'a>, opt: &'a str) -> UResult<boo
} }
} }
fn parse_program_opt<'a>(opts: &mut Options<'a>, opt: &'a str) -> UResult<()> { fn parse_program_opt<'a>(opts: &mut Options<'a>, opt: &'a OsStr) -> UResult<()> {
if opts.line_ending == LineEnding::Nul { if opts.line_ending == LineEnding::Nul {
Err(UUsageError::new( Err(UUsageError::new(
125, 125,
@ -96,23 +112,6 @@ fn load_config_file(opts: &mut Options) -> UResult<()> {
Ok(()) Ok(())
} }
#[cfg(not(windows))]
#[allow(clippy::ptr_arg)]
fn build_command<'a, 'b>(args: &'a Vec<&'b str>) -> (Cow<'b, str>, &'a [&'b str]) {
let progname = Cow::from(args[0]);
(progname, &args[1..])
}
#[cfg(windows)]
fn build_command<'a, 'b>(args: &'a mut Vec<&'b str>) -> (Cow<'b, str>, &'a [&'b str]) {
args.insert(0, "/d/c");
let progname = env::var("ComSpec")
.map(Cow::from)
.unwrap_or_else(|_| Cow::from("cmd"));
(progname, &args[..])
}
pub fn uu_app() -> Command { pub fn uu_app() -> Command {
Command::new(crate_name!()) Command::new(crate_name!())
.version(crate_version!()) .version(crate_version!())
@ -134,6 +133,7 @@ pub fn uu_app() -> Command {
.long("chdir") .long("chdir")
.number_of_values(1) .number_of_values(1)
.value_name("DIR") .value_name("DIR")
.value_parser(ValueParser::os_string())
.value_hint(clap::ValueHint::DirPath) .value_hint(clap::ValueHint::DirPath)
.help("change working directory to DIR"), .help("change working directory to DIR"),
) )
@ -153,6 +153,7 @@ pub fn uu_app() -> Command {
.long("file") .long("file")
.value_name("PATH") .value_name("PATH")
.value_hint(clap::ValueHint::FilePath) .value_hint(clap::ValueHint::FilePath)
.value_parser(ValueParser::os_string())
.action(ArgAction::Append) .action(ArgAction::Append)
.help( .help(
"read and set variables from a \".env\"-style configuration file \ "read and set variables from a \".env\"-style configuration file \
@ -165,25 +166,172 @@ pub fn uu_app() -> Command {
.long("unset") .long("unset")
.value_name("NAME") .value_name("NAME")
.action(ArgAction::Append) .action(ArgAction::Append)
.value_parser(ValueParser::os_string())
.help("remove variable from the environment"), .help("remove variable from the environment"),
) )
.arg(Arg::new("vars").action(ArgAction::Append)) .arg(
Arg::new("debug")
.short('v')
.long("debug")
.action(ArgAction::SetTrue)
.help("print verbose information for each processing step"),
)
.arg(
Arg::new("split-string") // split string handling is implemented directly, not using CLAP. But this entry here is needed for the help information output.
.short('S')
.long("split-string")
.value_name("S")
.action(ArgAction::Set)
.value_parser(ValueParser::os_string())
.help("process and split S into separate arguments; used to pass multiple arguments on shebang lines")
)
.arg(
Arg::new("vars")
.action(ArgAction::Append)
.value_parser(ValueParser::os_string())
)
}
pub fn parse_args_from_str(text: &NativeIntStr) -> UResult<Vec<NativeIntString>> {
split_iterator::split(text).map_err(|e| match e {
parse_error::ParseError::BackslashCNotAllowedInDoubleQuotes { pos: _ } => {
USimpleError::new(125, "'\\c' must not appear in double-quoted -S string")
}
parse_error::ParseError::InvalidBackslashAtEndOfStringInMinusS { pos: _, quoting: _ } => {
USimpleError::new(125, "invalid backslash at end of string in -S")
}
parse_error::ParseError::InvalidSequenceBackslashXInMinusS { pos: _, c } => {
USimpleError::new(125, format!("invalid sequence '\\{}' in -S", c))
}
parse_error::ParseError::MissingClosingQuote { pos: _, c: _ } => {
USimpleError::new(125, "no terminating quote in -S string")
}
parse_error::ParseError::ParsingOfVariableNameFailed { pos, msg } => {
USimpleError::new(125, format!("variable name issue (at {}): {}", pos, msg,))
}
_ => USimpleError::new(125, format!("Error: {:?}", e)),
})
}
fn debug_print_args(args: &[OsString]) {
eprintln!("input args:");
for (i, arg) in args.iter().enumerate() {
eprintln!("arg[{}]: {}", i, arg.quote());
}
}
fn check_and_handle_string_args(
arg: &OsString,
prefix_to_test: &str,
all_args: &mut Vec<std::ffi::OsString>,
do_debug_print_args: Option<&Vec<OsString>>,
) -> UResult<bool> {
let native_arg = NCvt::convert(arg);
if let Some(remaining_arg) = native_arg.strip_prefix(&*NCvt::convert(prefix_to_test)) {
if let Some(input_args) = do_debug_print_args {
debug_print_args(input_args); // do it here, such that its also printed when we get an error/panic during parsing
}
let arg_strings = parse_args_from_str(remaining_arg)?;
all_args.extend(
arg_strings
.into_iter()
.map(from_native_int_representation_owned),
);
Ok(true)
} else {
Ok(false)
}
}
#[derive(Default)]
struct EnvAppData {
do_debug_printing: bool,
had_string_argument: bool,
}
impl EnvAppData {
fn make_error_no_such_file_or_dir(&self, prog: &OsStr) -> Box<dyn UError> {
uucore::show_error!("{}: No such file or directory", prog.quote());
if !self.had_string_argument {
uucore::show_error!("{}", ERROR_MSG_S_SHEBANG);
}
ExitCode::new(127)
}
fn process_all_string_arguments(
&mut self,
original_args: &Vec<OsString>,
) -> UResult<Vec<std::ffi::OsString>> {
let mut all_args: Vec<std::ffi::OsString> = Vec::new();
for arg in original_args {
match arg {
b if check_and_handle_string_args(b, "--split-string", &mut all_args, None)? => {
self.had_string_argument = true;
}
b if check_and_handle_string_args(b, "-S", &mut all_args, None)? => {
self.had_string_argument = true;
}
b if check_and_handle_string_args(
b,
"-vS",
&mut all_args,
Some(original_args),
)? =>
{
self.do_debug_printing = true;
self.had_string_argument = true;
}
_ => {
all_args.push(arg.clone());
}
}
}
Ok(all_args)
} }
#[allow(clippy::cognitive_complexity)] #[allow(clippy::cognitive_complexity)]
fn run_env(args: impl uucore::Args) -> UResult<()> { fn run_env(&mut self, original_args: impl uucore::Args) -> UResult<()> {
let original_args: Vec<OsString> = original_args.collect();
let args = self.process_all_string_arguments(&original_args)?;
let app = uu_app(); let app = uu_app();
let matches = app.try_get_matches_from(args).with_exit_code(125)?; let matches = app
.try_get_matches_from(args)
.map_err(|e| -> Box<dyn UError> {
match e.kind() {
clap::error::ErrorKind::DisplayHelp
| clap::error::ErrorKind::DisplayVersion => e.into(),
_ => {
// extent any real issue with parameter parsing by the ERROR_MSG_S_SHEBANG
let s = format!("{}", e);
if !s.is_empty() {
let s = s.trim_end();
uucore::show_error!("{}", s);
}
uucore::show_error!("{}", ERROR_MSG_S_SHEBANG);
uucore::error::ExitCode::new(125)
}
}
})?;
let did_debug_printing_before = self.do_debug_printing; // could have been done already as part of the "-vS" string parsing
let do_debug_printing = self.do_debug_printing || matches.get_flag("debug");
if do_debug_printing && !did_debug_printing_before {
debug_print_args(&original_args);
}
let ignore_env = matches.get_flag("ignore-environment"); let ignore_env = matches.get_flag("ignore-environment");
let line_ending = LineEnding::from_zero_flag(matches.get_flag("null")); let line_ending = LineEnding::from_zero_flag(matches.get_flag("null"));
let running_directory = matches.get_one::<String>("chdir").map(|s| s.as_str()); let running_directory = matches.get_one::<OsString>("chdir").map(|s| s.as_os_str());
let files = match matches.get_many::<String>("file") { let files = match matches.get_many::<OsString>("file") {
Some(v) => v.map(|s| s.as_str()).collect(), Some(v) => v.map(|s| s.as_os_str()).collect(),
None => Vec::with_capacity(0), None => Vec::with_capacity(0),
}; };
let unsets = match matches.get_many::<String>("unset") { let unsets = match matches.get_many::<OsString>("unset") {
Some(v) => v.map(|s| s.as_str()).collect(), Some(v) => v.map(|s| s.as_os_str()).collect(),
None => Vec::with_capacity(0), None => Vec::with_capacity(0),
}; };
@ -204,14 +352,14 @@ fn run_env(args: impl uucore::Args) -> UResult<()> {
Err(error) => { Err(error) => {
return Err(USimpleError::new( return Err(USimpleError::new(
125, 125,
format!("cannot change directory to \"{d}\": {error}"), format!("cannot change directory to {}: {error}", d.quote()),
)); ));
} }
}; };
} }
let mut begin_prog_opts = false; let mut begin_prog_opts = false;
if let Some(mut iter) = matches.get_many::<String>("vars") { if let Some(mut iter) = matches.get_many::<OsString>("vars") {
// read NAME=VALUE arguments (and up to a single program argument) // read NAME=VALUE arguments (and up to a single program argument)
while !begin_prog_opts { while !begin_prog_opts {
if let Some(opt) = iter.next() { if let Some(opt) = iter.next() {
@ -244,7 +392,7 @@ fn run_env(args: impl uucore::Args) -> UResult<()> {
// remove all env vars if told to ignore presets // remove all env vars if told to ignore presets
if opts.ignore_env { if opts.ignore_env {
for (ref name, _) in env::vars() { for (ref name, _) in env::vars_os() {
env::remove_var(name); env::remove_var(name);
} }
} }
@ -254,7 +402,11 @@ fn run_env(args: impl uucore::Args) -> UResult<()> {
// unset specified env vars // unset specified env vars
for name in &opts.unsets { for name in &opts.unsets {
if name.is_empty() || name.contains(0 as char) || name.contains('=') { let native_name = NativeStr::new(name);
if name.is_empty()
|| native_name.contains(&'\0').unwrap()
|| native_name.contains(&'=').unwrap()
{
return Err(USimpleError::new( return Err(USimpleError::new(
125, 125,
format!("cannot unset {}: Invalid argument", name.quote()), format!("cannot unset {}: Invalid argument", name.quote()),
@ -265,7 +417,7 @@ fn run_env(args: impl uucore::Args) -> UResult<()> {
} }
// set specified env vars // set specified env vars
for &(name, val) in &opts.sets { for (name, val) in &opts.sets {
/* /*
* set_var panics if name is an empty string * set_var panics if name is an empty string
* set_var internally calls setenv (on unix at least), while GNU env calls putenv instead. * set_var internally calls setenv (on unix at least), while GNU env calls putenv instead.
@ -300,10 +452,15 @@ fn run_env(args: impl uucore::Args) -> UResult<()> {
print_env(opts.line_ending); print_env(opts.line_ending);
} else { } else {
// we need to execute a command // we need to execute a command
#[cfg(windows)] let prog = Cow::from(opts.program[0]);
let (prog, args) = build_command(&mut opts.program); let args = &opts.program[1..];
#[cfg(not(windows))]
let (prog, args) = build_command(&opts.program); if do_debug_printing {
eprintln!("executable: {}", prog.quote());
for (i, arg) in args.iter().enumerate() {
eprintln!("arg[{}]: {}", i, arg.quote());
}
}
/* /*
* On Unix-like systems Command::status either ends up calling either fork or posix_spawnp * On Unix-like systems Command::status either ends up calling either fork or posix_spawnp
@ -333,7 +490,11 @@ fn run_env(args: impl uucore::Args) -> UResult<()> {
let _ = unsafe { let _ = unsafe {
sigaction( sigaction(
signal, signal,
&SigAction::new(SigHandler::SigDfl, SaFlags::empty(), SigSet::all()), &SigAction::new(
SigHandler::SigDfl,
SaFlags::empty(),
SigSet::all(),
),
) )
}; };
@ -342,16 +503,25 @@ fn run_env(args: impl uucore::Args) -> UResult<()> {
#[cfg(not(unix))] #[cfg(not(unix))]
return Err(exit.code().unwrap().into()); return Err(exit.code().unwrap().into());
} }
Err(ref err) if err.kind() == io::ErrorKind::NotFound => return Err(127.into()), Err(ref err)
Err(_) => return Err(126.into()), if (err.kind() == io::ErrorKind::NotFound)
|| (err.kind() == io::ErrorKind::InvalidInput) =>
{
return Err(self.make_error_no_such_file_or_dir(prog.deref()));
}
Err(e) => {
uucore::show_error!("unknown error: {:?}", e);
return Err(126.into());
}
Ok(_) => (), Ok(_) => (),
} }
} }
Ok(()) Ok(())
} }
}
#[uucore::main] #[uucore::main]
pub fn uumain(args: impl uucore::Args) -> UResult<()> { pub fn uumain(args: impl uucore::Args) -> UResult<()> {
run_env(args) EnvAppData::default().run_env(args)
} }

325
src/uu/env/src/native_int_str.rs vendored Normal file
View file

@ -0,0 +1,325 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
// This module contains classes and functions for dealing with the differences
// between operating systems regarding the lossless processing of OsStr/OsString.
// In contrast to existing crates with similar purpose, this module does not use any
// `unsafe` features or functions.
// Due to a suboptimal design aspect of OsStr/OsString on windows, we need to
// encode/decode to wide chars on windows operating system.
// This prevents borrowing from OsStr on windows. Anyway, if optimally used,#
// this conversion needs to be done only once in the beginning and at the end.
use std::ffi::OsString;
#[cfg(not(target_os = "windows"))]
use std::os::unix::ffi::{OsStrExt, OsStringExt};
#[cfg(target_os = "windows")]
use std::os::windows::prelude::*;
use std::{borrow::Cow, ffi::OsStr};
#[cfg(target_os = "windows")]
use u16 as NativeIntCharU;
#[cfg(not(target_os = "windows"))]
use u8 as NativeIntCharU;
pub type NativeCharInt = NativeIntCharU;
pub type NativeIntStr = [NativeCharInt];
pub type NativeIntString = Vec<NativeCharInt>;
pub struct NCvt;
pub trait Convert<From, To> {
fn convert(f: From) -> To;
}
// ================ str/String =================
impl<'a> Convert<&'a str, Cow<'a, NativeIntStr>> for NCvt {
fn convert(f: &'a str) -> Cow<'a, NativeIntStr> {
#[cfg(target_os = "windows")]
{
Cow::Owned(f.encode_utf16().collect())
}
#[cfg(not(target_os = "windows"))]
{
Cow::Borrowed(f.as_bytes())
}
}
}
impl<'a> Convert<&'a String, Cow<'a, NativeIntStr>> for NCvt {
fn convert(f: &'a String) -> Cow<'a, NativeIntStr> {
#[cfg(target_os = "windows")]
{
Cow::Owned(f.encode_utf16().collect())
}
#[cfg(not(target_os = "windows"))]
{
Cow::Borrowed(f.as_bytes())
}
}
}
impl<'a> Convert<String, Cow<'a, NativeIntStr>> for NCvt {
fn convert(f: String) -> Cow<'a, NativeIntStr> {
#[cfg(target_os = "windows")]
{
Cow::Owned(f.encode_utf16().collect())
}
#[cfg(not(target_os = "windows"))]
{
Cow::Owned(f.into_bytes())
}
}
}
// ================ OsStr/OsString =================
impl<'a> Convert<&'a OsStr, Cow<'a, NativeIntStr>> for NCvt {
fn convert(f: &'a OsStr) -> Cow<'a, NativeIntStr> {
to_native_int_representation(f)
}
}
impl<'a> Convert<&'a OsString, Cow<'a, NativeIntStr>> for NCvt {
fn convert(f: &'a OsString) -> Cow<'a, NativeIntStr> {
to_native_int_representation(f)
}
}
impl<'a> Convert<OsString, Cow<'a, NativeIntStr>> for NCvt {
fn convert(f: OsString) -> Cow<'a, NativeIntStr> {
#[cfg(target_os = "windows")]
{
Cow::Owned(f.encode_wide().collect())
}
#[cfg(not(target_os = "windows"))]
{
Cow::Owned(f.into_vec())
}
}
}
// ================ Vec<Str/String> =================
impl<'a> Convert<&'a Vec<&'a str>, Vec<Cow<'a, NativeIntStr>>> for NCvt {
fn convert(f: &'a Vec<&'a str>) -> Vec<Cow<'a, NativeIntStr>> {
f.iter().map(|x| Self::convert(*x)).collect()
}
}
impl<'a> Convert<Vec<&'a str>, Vec<Cow<'a, NativeIntStr>>> for NCvt {
fn convert(f: Vec<&'a str>) -> Vec<Cow<'a, NativeIntStr>> {
f.iter().map(|x| Self::convert(*x)).collect()
}
}
impl<'a> Convert<&'a Vec<String>, Vec<Cow<'a, NativeIntStr>>> for NCvt {
fn convert(f: &'a Vec<String>) -> Vec<Cow<'a, NativeIntStr>> {
f.iter().map(Self::convert).collect()
}
}
impl<'a> Convert<Vec<String>, Vec<Cow<'a, NativeIntStr>>> for NCvt {
fn convert(f: Vec<String>) -> Vec<Cow<'a, NativeIntStr>> {
f.into_iter().map(Self::convert).collect()
}
}
pub fn to_native_int_representation(input: &OsStr) -> Cow<'_, NativeIntStr> {
#[cfg(target_os = "windows")]
{
Cow::Owned(input.encode_wide().collect())
}
#[cfg(not(target_os = "windows"))]
{
Cow::Borrowed(input.as_bytes())
}
}
#[allow(clippy::needless_pass_by_value)] // needed on windows
pub fn from_native_int_representation(input: Cow<'_, NativeIntStr>) -> Cow<'_, OsStr> {
#[cfg(target_os = "windows")]
{
Cow::Owned(OsString::from_wide(&input))
}
#[cfg(not(target_os = "windows"))]
{
match input {
Cow::Borrowed(borrow) => Cow::Borrowed(OsStr::from_bytes(borrow)),
Cow::Owned(own) => Cow::Owned(OsString::from_vec(own)),
}
}
}
#[allow(clippy::needless_pass_by_value)] // needed on windows
pub fn from_native_int_representation_owned(input: NativeIntString) -> OsString {
#[cfg(target_os = "windows")]
{
OsString::from_wide(&input)
}
#[cfg(not(target_os = "windows"))]
{
OsString::from_vec(input)
}
}
pub fn get_single_native_int_value(c: &char) -> Option<NativeCharInt> {
#[cfg(target_os = "windows")]
{
let mut buf = [0u16, 0];
let s = c.encode_utf16(&mut buf);
if s.len() == 1 {
Some(buf[0])
} else {
None
}
}
#[cfg(not(target_os = "windows"))]
{
let mut buf = [0u8, 0, 0, 0];
let s = c.encode_utf8(&mut buf);
if s.len() == 1 {
Some(buf[0])
} else {
None
}
}
}
pub fn get_char_from_native_int(ni: NativeCharInt) -> Option<(char, NativeCharInt)> {
let c_opt;
#[cfg(target_os = "windows")]
{
c_opt = char::decode_utf16([ni; 1]).next().unwrap().ok();
};
#[cfg(not(target_os = "windows"))]
{
c_opt = std::str::from_utf8(&[ni; 1])
.ok()
.map(|x| x.chars().next().unwrap());
};
if let Some(c) = c_opt {
return Some((c, ni));
}
None
}
pub struct NativeStr<'a> {
native: Cow<'a, NativeIntStr>,
}
impl<'a> NativeStr<'a> {
pub fn new(str: &'a OsStr) -> Self {
Self {
native: to_native_int_representation(str),
}
}
pub fn native(&self) -> Cow<'a, NativeIntStr> {
self.native.clone()
}
pub fn into_native(self) -> Cow<'a, NativeIntStr> {
self.native
}
pub fn contains(&self, x: &char) -> Option<bool> {
let n_c = get_single_native_int_value(x)?;
Some(self.native.contains(&n_c))
}
pub fn slice(&self, from: usize, to: usize) -> Cow<'a, OsStr> {
let result = self.match_cow(|b| Ok::<_, ()>(&b[from..to]), |o| Ok(o[from..to].to_vec()));
result.unwrap()
}
pub fn split_once(&self, pred: &char) -> Option<(Cow<'a, OsStr>, Cow<'a, OsStr>)> {
let n_c = get_single_native_int_value(pred)?;
let p = self.native.iter().position(|&x| x == n_c)?;
let before = self.slice(0, p);
let after = self.slice(p + 1, self.native.len());
Some((before, after))
}
pub fn split_at(&self, pos: usize) -> (Cow<'a, OsStr>, Cow<'a, OsStr>) {
let before = self.slice(0, pos);
let after = self.slice(pos, self.native.len());
(before, after)
}
pub fn strip_prefix(&self, prefix: &OsStr) -> Option<Cow<'a, OsStr>> {
let n_prefix = to_native_int_representation(prefix);
let result = self.match_cow(
|b| b.strip_prefix(&*n_prefix).ok_or(()),
|o| o.strip_prefix(&*n_prefix).map(|x| x.to_vec()).ok_or(()),
);
result.ok()
}
pub fn strip_prefix_native(&self, prefix: &OsStr) -> Option<Cow<'a, NativeIntStr>> {
let n_prefix = to_native_int_representation(prefix);
let result = self.match_cow_native(
|b| b.strip_prefix(&*n_prefix).ok_or(()),
|o| o.strip_prefix(&*n_prefix).map(|x| x.to_vec()).ok_or(()),
);
result.ok()
}
fn match_cow<FnBorrow, FnOwned, Err>(
&self,
f_borrow: FnBorrow,
f_owned: FnOwned,
) -> Result<Cow<'a, OsStr>, Err>
where
FnBorrow: FnOnce(&'a [NativeCharInt]) -> Result<&'a [NativeCharInt], Err>,
FnOwned: FnOnce(&Vec<NativeCharInt>) -> Result<Vec<NativeCharInt>, Err>,
{
match &self.native {
Cow::Borrowed(b) => {
let slice = f_borrow(b);
let os_str = slice.map(|x| from_native_int_representation(Cow::Borrowed(x)));
os_str
}
Cow::Owned(o) => {
let slice = f_owned(o);
let os_str = slice.map(from_native_int_representation_owned);
os_str.map(Cow::Owned)
}
}
}
fn match_cow_native<FnBorrow, FnOwned, Err>(
&self,
f_borrow: FnBorrow,
f_owned: FnOwned,
) -> Result<Cow<'a, NativeIntStr>, Err>
where
FnBorrow: FnOnce(&'a [NativeCharInt]) -> Result<&'a [NativeCharInt], Err>,
FnOwned: FnOnce(&Vec<NativeCharInt>) -> Result<Vec<NativeCharInt>, Err>,
{
match &self.native {
Cow::Borrowed(b) => {
let slice = f_borrow(b);
slice.map(Cow::Borrowed)
}
Cow::Owned(o) => {
let slice = f_owned(o);
slice.map(Cow::Owned)
}
}
}
}

55
src/uu/env/src/parse_error.rs vendored Normal file
View file

@ -0,0 +1,55 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::fmt;
use crate::string_parser;
/// An error returned when string arg splitting fails.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ParseError {
MissingClosingQuote {
pos: usize,
c: char,
},
InvalidBackslashAtEndOfStringInMinusS {
pos: usize,
quoting: String,
},
BackslashCNotAllowedInDoubleQuotes {
pos: usize,
},
InvalidSequenceBackslashXInMinusS {
pos: usize,
c: char,
},
ParsingOfVariableNameFailed {
pos: usize,
msg: String,
},
InternalError {
pos: usize,
sub_err: string_parser::Error,
},
ReachedEnd,
ContinueWithDelimiter,
}
impl fmt::Display for ParseError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(format!("{:?}", self).as_str())
}
}
impl std::error::Error for ParseError {}
impl From<string_parser::Error> for ParseError {
fn from(value: string_parser::Error) -> Self {
Self::InternalError {
pos: value.peek_position,
sub_err: value,
}
}
}

375
src/uu/env/src/split_iterator.rs vendored Normal file
View file

@ -0,0 +1,375 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//
// This file is based on work from Tomasz Miąsko who published it as "shell_words" crate,
// licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
// or the MIT license <LICENSE-MIT>, at your option.
//
//! Process command line according to parsing rules of original GNU env.
//! Even though it looks quite like a POSIX syntax, the original
//! "shell_words" implementation had to be adapted significantly.
//!
//! Apart from the grammar differences, there is a new feature integrated: $VARIABLE expansion.
//!
//! [GNU env] <https://www.gnu.org/software/coreutils/manual/html_node/env-invocation.html#g_t_002dS_002f_002d_002dsplit_002dstring-syntax>
// spell-checker:ignore (words) Tomasz Miąsko rntfv FFFD varname
#![forbid(unsafe_code)]
use std::borrow::Cow;
use crate::native_int_str::from_native_int_representation;
use crate::native_int_str::NativeCharInt;
use crate::native_int_str::NativeIntStr;
use crate::native_int_str::NativeIntString;
use crate::parse_error::ParseError;
use crate::string_expander::StringExpander;
use crate::string_parser::StringParser;
use crate::variable_parser::VariableParser;
const BACKSLASH: char = '\\';
const DOUBLE_QUOTES: char = '\"';
const SINGLE_QUOTES: char = '\'';
const NEW_LINE: char = '\n';
const DOLLAR: char = '$';
const REPLACEMENTS: [(char, char); 9] = [
('r', '\r'),
('n', '\n'),
('t', '\t'),
('f', '\x0C'),
('v', '\x0B'),
('_', ' '),
('#', '#'),
('$', '$'),
('"', '"'),
];
const ASCII_WHITESPACE_CHARS: [char; 6] = [' ', '\t', '\r', '\n', '\x0B', '\x0C'];
pub struct SplitIterator<'a> {
expander: StringExpander<'a>,
words: Vec<Vec<NativeCharInt>>,
}
impl<'a> SplitIterator<'a> {
pub fn new(s: &'a NativeIntStr) -> Self {
Self {
expander: StringExpander::new(s),
words: Vec::new(),
}
}
fn skip_one(&mut self) -> Result<(), ParseError> {
self.expander
.get_parser_mut()
.consume_one_ascii_or_all_non_ascii()?;
Ok(())
}
fn take_one(&mut self) -> Result<(), ParseError> {
Ok(self.expander.take_one()?)
}
fn get_current_char(&self) -> Option<char> {
self.expander.peek().ok()
}
fn push_char_to_word(&mut self, c: char) {
self.expander.put_one_char(c);
}
fn push_word_to_words(&mut self) {
let word = self.expander.take_collected_output();
self.words.push(word);
}
fn get_parser(&self) -> &StringParser<'a> {
self.expander.get_parser()
}
fn get_parser_mut(&mut self) -> &mut StringParser<'a> {
self.expander.get_parser_mut()
}
fn substitute_variable<'x>(&'x mut self) -> Result<(), ParseError> {
let mut var_parse = VariableParser::<'a, '_> {
parser: self.get_parser_mut(),
};
let (name, default) = var_parse.parse_variable()?;
let varname_os_str_cow = from_native_int_representation(Cow::Borrowed(name));
let value = std::env::var_os(varname_os_str_cow);
match (&value, default) {
(None, None) => {} // do nothing, just replace it with ""
(Some(value), _) => {
self.expander.put_string(value);
}
(None, Some(default)) => {
self.expander.put_native_string(default);
}
};
Ok(())
}
fn check_and_replace_ascii_escape_code(&mut self, c: char) -> Result<bool, ParseError> {
if let Some(replace) = REPLACEMENTS.iter().find(|&x| x.0 == c) {
self.skip_one()?;
self.push_char_to_word(replace.1);
return Ok(true);
}
Ok(false)
}
fn make_invalid_sequence_backslash_xin_minus_s(&self, c: char) -> ParseError {
ParseError::InvalidSequenceBackslashXInMinusS {
pos: self.expander.get_parser().get_peek_position(),
c,
}
}
fn state_root(&mut self) -> Result<(), ParseError> {
loop {
match self.state_delimiter() {
Err(ParseError::ContinueWithDelimiter) => {}
Err(ParseError::ReachedEnd) => return Ok(()),
result => return result,
}
}
}
fn state_delimiter(&mut self) -> Result<(), ParseError> {
loop {
match self.get_current_char() {
None => return Ok(()),
Some('#') => {
self.skip_one()?;
self.state_comment()?;
}
Some(BACKSLASH) => {
self.skip_one()?;
self.state_delimiter_backslash()?;
}
Some(c) if ASCII_WHITESPACE_CHARS.contains(&c) => {
self.skip_one()?;
}
Some(_) => {
// Don't consume char. Will be done in unquoted state.
self.state_unquoted()?;
}
}
}
}
fn state_delimiter_backslash(&mut self) -> Result<(), ParseError> {
match self.get_current_char() {
None => Err(ParseError::InvalidBackslashAtEndOfStringInMinusS {
pos: self.get_parser().get_peek_position(),
quoting: "Delimiter".into(),
}),
Some('_') | Some(NEW_LINE) => {
self.skip_one()?;
Ok(())
}
Some(DOLLAR) | Some(BACKSLASH) | Some('#') | Some(SINGLE_QUOTES)
| Some(DOUBLE_QUOTES) => {
self.take_one()?;
self.state_unquoted()
}
Some('c') => Err(ParseError::ReachedEnd),
Some(c) if self.check_and_replace_ascii_escape_code(c)? => self.state_unquoted(),
Some(c) => Err(self.make_invalid_sequence_backslash_xin_minus_s(c)),
}
}
fn state_unquoted(&mut self) -> Result<(), ParseError> {
loop {
match self.get_current_char() {
None => {
self.push_word_to_words();
return Err(ParseError::ReachedEnd);
}
Some(DOLLAR) => {
self.substitute_variable()?;
}
Some(SINGLE_QUOTES) => {
self.skip_one()?;
self.state_single_quoted()?;
}
Some(DOUBLE_QUOTES) => {
self.skip_one()?;
self.state_double_quoted()?;
}
Some(BACKSLASH) => {
self.skip_one()?;
self.state_unquoted_backslash()?;
}
Some(c) if ASCII_WHITESPACE_CHARS.contains(&c) => {
self.push_word_to_words();
self.skip_one()?;
return Ok(());
}
Some(_) => {
self.take_one()?;
}
}
}
}
fn state_unquoted_backslash(&mut self) -> Result<(), ParseError> {
match self.get_current_char() {
None => Err(ParseError::InvalidBackslashAtEndOfStringInMinusS {
pos: self.get_parser().get_peek_position(),
quoting: "Unquoted".into(),
}),
Some(NEW_LINE) => {
self.skip_one()?;
Ok(())
}
Some('_') => {
self.skip_one()?;
self.push_word_to_words();
Err(ParseError::ContinueWithDelimiter)
}
Some('c') => {
self.push_word_to_words();
Err(ParseError::ReachedEnd)
}
Some(DOLLAR) | Some(BACKSLASH) | Some(SINGLE_QUOTES) | Some(DOUBLE_QUOTES) => {
self.take_one()?;
Ok(())
}
Some(c) if self.check_and_replace_ascii_escape_code(c)? => Ok(()),
Some(c) => Err(self.make_invalid_sequence_backslash_xin_minus_s(c)),
}
}
fn state_single_quoted(&mut self) -> Result<(), ParseError> {
loop {
match self.get_current_char() {
None => {
return Err(ParseError::MissingClosingQuote {
pos: self.get_parser().get_peek_position(),
c: '\'',
})
}
Some(SINGLE_QUOTES) => {
self.skip_one()?;
return Ok(());
}
Some(BACKSLASH) => {
self.skip_one()?;
self.split_single_quoted_backslash()?;
}
Some(_) => {
self.take_one()?;
}
}
}
}
fn split_single_quoted_backslash(&mut self) -> Result<(), ParseError> {
match self.get_current_char() {
None => Err(ParseError::MissingClosingQuote {
pos: self.get_parser().get_peek_position(),
c: '\'',
}),
Some(NEW_LINE) => {
self.skip_one()?;
Ok(())
}
Some(SINGLE_QUOTES) | Some(BACKSLASH) => {
self.take_one()?;
Ok(())
}
Some(c) if REPLACEMENTS.iter().any(|&x| x.0 == c) => {
// See GNU test-suite e11: In single quotes, \t remains as it is.
// Comparing with GNU behavior: \a is not accepted and issues an error.
// So apparently only known sequences are allowed, even though they are not expanded.... bug of GNU?
self.push_char_to_word(BACKSLASH);
self.take_one()?;
Ok(())
}
Some(c) => Err(self.make_invalid_sequence_backslash_xin_minus_s(c)),
}
}
fn state_double_quoted(&mut self) -> Result<(), ParseError> {
loop {
match self.get_current_char() {
None => {
return Err(ParseError::MissingClosingQuote {
pos: self.get_parser().get_peek_position(),
c: '"',
})
}
Some(DOLLAR) => {
self.substitute_variable()?;
}
Some(DOUBLE_QUOTES) => {
self.skip_one()?;
return Ok(());
}
Some(BACKSLASH) => {
self.skip_one()?;
self.state_double_quoted_backslash()?;
}
Some(_) => {
self.take_one()?;
}
}
}
}
fn state_double_quoted_backslash(&mut self) -> Result<(), ParseError> {
match self.get_current_char() {
None => Err(ParseError::MissingClosingQuote {
pos: self.get_parser().get_peek_position(),
c: '"',
}),
Some(NEW_LINE) => {
self.skip_one()?;
Ok(())
}
Some(DOUBLE_QUOTES) | Some(DOLLAR) | Some(BACKSLASH) => {
self.take_one()?;
Ok(())
}
Some('c') => Err(ParseError::BackslashCNotAllowedInDoubleQuotes {
pos: self.get_parser().get_peek_position(),
}),
Some(c) if self.check_and_replace_ascii_escape_code(c)? => Ok(()),
Some(c) => Err(self.make_invalid_sequence_backslash_xin_minus_s(c)),
}
}
fn state_comment(&mut self) -> Result<(), ParseError> {
loop {
match self.get_current_char() {
None => return Err(ParseError::ReachedEnd),
Some(NEW_LINE) => {
self.skip_one()?;
return Ok(());
}
Some(_) => {
self.get_parser_mut().skip_until_char_or_end(NEW_LINE);
}
}
}
}
pub fn split(mut self) -> Result<Vec<NativeIntString>, ParseError> {
self.state_root()?;
Ok(self.words)
}
}
pub fn split(s: &NativeIntStr) -> Result<Vec<NativeIntString>, ParseError> {
let splitted_args = SplitIterator::new(s).split()?;
Ok(splitted_args)
}

92
src/uu/env/src/string_expander.rs vendored Normal file
View file

@ -0,0 +1,92 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::{
ffi::{OsStr, OsString},
mem,
ops::Deref,
};
use crate::{
native_int_str::{to_native_int_representation, NativeCharInt, NativeIntStr},
string_parser::{Chunk, Error, StringParser},
};
/// This class makes parsing and word collection more convenient.
///
/// It manages an "output" buffer that is automatically filled.
/// It provides "skip_one" and "take_one" that focus on
/// working with ASCII separators. Thus they will skip or take
/// all consecutive non-ascii char sequences at once.
pub struct StringExpander<'a> {
parser: StringParser<'a>,
output: Vec<NativeCharInt>,
}
impl<'a> StringExpander<'a> {
pub fn new(input: &'a NativeIntStr) -> Self {
Self {
parser: StringParser::new(input),
output: Vec::default(),
}
}
pub fn new_at(input: &'a NativeIntStr, pos: usize) -> Self {
Self {
parser: StringParser::new_at(input, pos),
output: Vec::default(),
}
}
pub fn get_parser(&self) -> &StringParser<'a> {
&self.parser
}
pub fn get_parser_mut(&mut self) -> &mut StringParser<'a> {
&mut self.parser
}
pub fn peek(&self) -> Result<char, Error> {
self.parser.peek()
}
pub fn skip_one(&mut self) -> Result<(), Error> {
self.get_parser_mut().consume_one_ascii_or_all_non_ascii()?;
Ok(())
}
pub fn get_peek_position(&self) -> usize {
self.get_parser().get_peek_position()
}
pub fn take_one(&mut self) -> Result<(), Error> {
let chunks = self.parser.consume_one_ascii_or_all_non_ascii()?;
for chunk in chunks {
match chunk {
Chunk::InvalidEncoding(invalid) => self.output.extend(invalid),
Chunk::ValidSingleIntChar((_c, ni)) => self.output.push(ni),
}
}
Ok(())
}
pub fn put_one_char(&mut self, c: char) {
let os_str = OsString::from(c.to_string());
self.put_string(os_str);
}
pub fn put_string<S: AsRef<OsStr>>(&mut self, os_str: S) {
let native = to_native_int_representation(os_str.as_ref());
self.output.extend(native.deref());
}
pub fn put_native_string(&mut self, n_str: &NativeIntStr) {
self.output.extend(n_str);
}
pub fn take_collected_output(&mut self) -> Vec<NativeCharInt> {
mem::take(&mut self.output)
}
}

182
src/uu/env/src/string_parser.rs vendored Normal file
View file

@ -0,0 +1,182 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//
// spell-checker:ignore (words) splitted FFFD
#![forbid(unsafe_code)]
use std::{borrow::Cow, ffi::OsStr};
use crate::native_int_str::{
from_native_int_representation, get_char_from_native_int, get_single_native_int_value,
NativeCharInt, NativeIntStr,
};
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Error {
pub peek_position: usize,
pub err_type: ErrorType,
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ErrorType {
EndOfInput,
InternalError,
}
/// Provides a valid char or a invalid sequence of bytes.
///
/// Invalid byte sequences can't be splitted in any meaningful way.
/// Thus, they need to be consumed as one piece.
pub enum Chunk<'a> {
InvalidEncoding(&'a NativeIntStr),
ValidSingleIntChar((char, NativeCharInt)),
}
/// This class makes parsing a OsString char by char more convenient.
///
/// It also allows to capturing of intermediate positions for later splitting.
pub struct StringParser<'a> {
input: &'a NativeIntStr,
pointer: usize,
remaining: &'a NativeIntStr,
}
impl<'a> StringParser<'a> {
pub fn new(input: &'a NativeIntStr) -> Self {
let mut instance = Self {
input,
pointer: 0,
remaining: input,
};
instance.set_pointer(0);
instance
}
pub fn new_at(input: &'a NativeIntStr, pos: usize) -> Self {
let mut instance = Self::new(input);
instance.set_pointer(pos);
instance
}
pub fn get_input(&self) -> &'a NativeIntStr {
self.input
}
pub fn get_peek_position(&self) -> usize {
self.pointer
}
pub fn peek(&self) -> Result<char, Error> {
self.peek_char_at_pointer(self.pointer)
}
fn make_err(&self, err_type: ErrorType) -> Error {
Error {
peek_position: self.get_peek_position(),
err_type,
}
}
pub fn peek_char_at_pointer(&self, at_pointer: usize) -> Result<char, Error> {
let split = self.input.split_at(at_pointer).1;
if split.is_empty() {
return Err(self.make_err(ErrorType::EndOfInput));
}
if let Some((c, _ni)) = get_char_from_native_int(split[0]) {
Ok(c)
} else {
Ok('\u{FFFD}')
}
}
fn get_chunk_with_length_at(&self, pointer: usize) -> Result<(Chunk<'a>, usize), Error> {
let (_before, after) = self.input.split_at(pointer);
if after.is_empty() {
return Err(self.make_err(ErrorType::EndOfInput));
}
if let Some(c_ni) = get_char_from_native_int(after[0]) {
Ok((Chunk::ValidSingleIntChar(c_ni), 1))
} else {
let mut i = 1;
while i < after.len() {
if let Some(_c) = get_char_from_native_int(after[i]) {
break;
}
i += 1;
}
let chunk = &after[0..i];
Ok((Chunk::InvalidEncoding(chunk), chunk.len()))
}
}
pub fn peek_chunk(&self) -> Option<Chunk<'a>> {
return self
.get_chunk_with_length_at(self.pointer)
.ok()
.map(|(chunk, _)| chunk);
}
pub fn consume_chunk(&mut self) -> Result<Chunk<'a>, Error> {
let (chunk, len) = self.get_chunk_with_length_at(self.pointer)?;
self.set_pointer(self.pointer + len);
Ok(chunk)
}
pub fn consume_one_ascii_or_all_non_ascii(&mut self) -> Result<Vec<Chunk<'a>>, Error> {
let mut result = Vec::<Chunk<'a>>::new();
loop {
let data = self.consume_chunk()?;
let was_ascii = if let Chunk::ValidSingleIntChar((c, _ni)) = &data {
c.is_ascii()
} else {
false
};
result.push(data);
if was_ascii {
return Ok(result);
}
match self.peek_chunk() {
Some(Chunk::ValidSingleIntChar((c, _ni))) if c.is_ascii() => return Ok(result),
None => return Ok(result),
_ => {}
}
}
}
pub fn skip_multiple(&mut self, skip_byte_count: usize) {
let end_ptr = self.pointer + skip_byte_count;
self.set_pointer(end_ptr);
}
pub fn skip_until_char_or_end(&mut self, c: char) {
let native_rep = get_single_native_int_value(&c).unwrap();
let pos = self.remaining.iter().position(|x| *x == native_rep);
if let Some(pos) = pos {
self.set_pointer(self.pointer + pos);
} else {
self.set_pointer(self.input.len());
}
}
pub fn substring(&self, range: &std::ops::Range<usize>) -> &'a NativeIntStr {
let (_before1, after1) = self.input.split_at(range.start);
let (middle, _after2) = after1.split_at(range.end - range.start);
middle
}
pub fn peek_remaining(&self) -> Cow<'a, OsStr> {
from_native_int_representation(Cow::Borrowed(self.remaining))
}
pub fn set_pointer(&mut self, new_pointer: usize) {
self.pointer = new_pointer;
let (_before, after) = self.input.split_at(self.pointer);
self.remaining = after;
}
}

158
src/uu/env/src/variable_parser.rs vendored Normal file
View file

@ -0,0 +1,158 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::ops::Range;
use crate::{native_int_str::NativeIntStr, parse_error::ParseError, string_parser::StringParser};
pub struct VariableParser<'a, 'b> {
pub parser: &'b mut StringParser<'a>,
}
impl<'a, 'b> VariableParser<'a, 'b> {
fn get_current_char(&self) -> Option<char> {
self.parser.peek().ok()
}
fn check_variable_name_start(&self) -> Result<(), ParseError> {
if let Some(c) = self.get_current_char() {
if c.is_ascii_digit() {
return Err(ParseError::ParsingOfVariableNameFailed {
pos: self.parser.get_peek_position(),
msg: format!("Unexpected character: '{}', expected variable name must not start with 0..9", c) });
}
}
Ok(())
}
fn skip_one(&mut self) -> Result<(), ParseError> {
self.parser.consume_chunk()?;
Ok(())
}
fn parse_braced_variable_name(
&mut self,
) -> Result<(&'a NativeIntStr, Option<&'a NativeIntStr>), ParseError> {
let pos_start = self.parser.get_peek_position();
self.check_variable_name_start()?;
let (varname_end, default_end);
loop {
match self.get_current_char() {
None => {
return Err(ParseError::ParsingOfVariableNameFailed {
pos: self.parser.get_peek_position(), msg: "Missing closing brace".into() })
},
Some(c) if !c.is_ascii() || c.is_ascii_alphanumeric() || c == '_' => {
self.skip_one()?;
}
Some(':') => {
varname_end = self.parser.get_peek_position();
loop {
match self.get_current_char() {
None => {
return Err(ParseError::ParsingOfVariableNameFailed {
pos: self.parser.get_peek_position(),
msg: "Missing closing brace after default value".into() })
},
Some('}') => {
default_end = Some(self.parser.get_peek_position());
self.skip_one()?;
break
},
Some(_) => {
self.skip_one()?;
},
}
}
break;
},
Some('}') => {
varname_end = self.parser.get_peek_position();
default_end = None;
self.skip_one()?;
break;
},
Some(c) => {
return Err(ParseError::ParsingOfVariableNameFailed {
pos: self.parser.get_peek_position(),
msg: format!("Unexpected character: '{}', expected a closing brace ('}}') or colon (':')", c)
})
},
};
}
let default_opt = if let Some(default_end) = default_end {
Some(self.parser.substring(&Range {
start: varname_end + 1,
end: default_end,
}))
} else {
None
};
let varname = self.parser.substring(&Range {
start: pos_start,
end: varname_end,
});
Ok((varname, default_opt))
}
fn parse_unbraced_variable_name(&mut self) -> Result<&'a NativeIntStr, ParseError> {
let pos_start = self.parser.get_peek_position();
self.check_variable_name_start()?;
loop {
match self.get_current_char() {
None => break,
Some(c) if c.is_ascii_alphanumeric() || c == '_' => {
self.skip_one()?;
}
Some(_) => break,
};
}
let pos_end = self.parser.get_peek_position();
if pos_end == pos_start {
return Err(ParseError::ParsingOfVariableNameFailed {
pos: pos_start,
msg: "Missing variable name".into(),
});
}
let varname = self.parser.substring(&Range {
start: pos_start,
end: pos_end,
});
Ok(varname)
}
pub fn parse_variable(
&mut self,
) -> Result<(&'a NativeIntStr, Option<&'a NativeIntStr>), ParseError> {
self.skip_one()?;
let (name, default) = match self.get_current_char() {
None => {
return Err(ParseError::ParsingOfVariableNameFailed {
pos: self.parser.get_peek_position(),
msg: "missing variable name".into(),
})
}
Some('{') => {
self.skip_one()?;
self.parse_braced_variable_name()?
}
Some(_) => (self.parse_unbraced_variable_name()?, None),
};
Ok((name, default))
}
}

View file

@ -2,9 +2,12 @@
// //
// For the full copyright and license information, please view the LICENSE // For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code. // file that was distributed with this source code.
// spell-checker:ignore (words) bamf chdir rlimit prlimit COMSPEC // spell-checker:ignore (words) bamf chdir rlimit prlimit COMSPEC cout cerr FFFD
#[cfg(target_os = "linux")]
use crate::common::util::expected_result;
use crate::common::util::TestScenario; use crate::common::util::TestScenario;
use ::env::native_int_str::{Convert, NCvt};
use std::env; use std::env;
use std::path::Path; use std::path::Path;
use tempfile::tempdir; use tempfile::tempdir;
@ -34,11 +37,24 @@ fn test_env_version() {
#[test] #[test]
fn test_echo() { fn test_echo() {
let result = new_ucmd!().arg("echo").arg("FOO-bar").succeeds(); #[cfg(target_os = "windows")]
let args = ["cmd", "/d/c", "echo"];
#[cfg(not(target_os = "windows"))]
let args = ["echo"];
let result = new_ucmd!().args(&args).arg("FOO-bar").succeeds();
assert_eq!(result.stdout_str().trim(), "FOO-bar"); assert_eq!(result.stdout_str().trim(), "FOO-bar");
} }
#[cfg(target_os = "windows")]
#[test]
fn test_if_windows_batch_files_can_be_executed() {
let result = new_ucmd!().arg("./runBat.bat").succeeds();
assert!(result.stdout_str().contains("Hello Windows World!"));
}
#[test] #[test]
fn test_file_option() { fn test_file_option() {
let out = new_ucmd!() let out = new_ucmd!()
@ -245,3 +261,935 @@ fn test_fail_change_directory() {
.stderr_move_str(); .stderr_move_str();
assert!(out.contains("env: cannot change directory to ")); assert!(out.contains("env: cannot change directory to "));
} }
#[cfg(not(target_os = "windows"))] // windows has no executable "echo", its only supported as part of a batch-file
#[test]
fn test_split_string_into_args_one_argument_no_quotes() {
let scene = TestScenario::new(util_name!());
let out = scene
.ucmd()
.arg("-S echo hello world")
.succeeds()
.stdout_move_str();
assert_eq!(out, "hello world\n");
}
#[cfg(not(target_os = "windows"))] // windows has no executable "echo", its only supported as part of a batch-file
#[test]
fn test_split_string_into_args_one_argument() {
let scene = TestScenario::new(util_name!());
let out = scene
.ucmd()
.arg("-S echo \"hello world\"")
.succeeds()
.stdout_move_str();
assert_eq!(out, "hello world\n");
}
#[cfg(not(target_os = "windows"))] // windows has no executable "echo", its only supported as part of a batch-file
#[test]
fn test_split_string_into_args_s_escaping_challenge() {
let scene = TestScenario::new(util_name!());
let out = scene
.ucmd()
.args(&[r#"-S echo "hello \"great\" world""#])
.succeeds()
.stdout_move_str();
assert_eq!(out, "hello \"great\" world\n");
}
#[test]
fn test_split_string_into_args_s_escaped_c_not_allowed() {
let scene = TestScenario::new(util_name!());
let out = scene.ucmd().args(&[r#"-S"\c""#]).fails().stderr_move_str();
assert_eq!(
out,
"env: '\\c' must not appear in double-quoted -S string\n"
);
}
#[cfg(not(target_os = "windows"))] // no printf available
#[test]
fn test_split_string_into_args_s_whitespace_handling() {
let scene = TestScenario::new(util_name!());
let out = scene
.ucmd()
.args(&["-Sprintf x%sx\\n A \t B \x0B\x0C\r\n"])
.succeeds()
.stdout_move_str();
assert_eq!(out, "xAx\nxBx\n");
}
#[cfg(not(target_os = "windows"))] // no printf available
#[test]
fn test_split_string_into_args_long_option_whitespace_handling() {
let scene = TestScenario::new(util_name!());
let out = scene
.ucmd()
.args(&["--split-string printf x%sx\\n A \t B \x0B\x0C\r\n"])
.succeeds()
.stdout_move_str();
assert_eq!(out, "xAx\nxBx\n");
}
#[cfg(not(target_os = "windows"))] // no printf available
#[test]
fn test_split_string_into_args_debug_output_whitespace_handling() {
let scene = TestScenario::new(util_name!());
let out = scene
.ucmd()
.args(&["-vS printf x%sx\\n A \t B \x0B\x0C\r\n"])
.succeeds();
assert_eq!(out.stdout_str(), "xAx\nxBx\n");
assert_eq!(out.stderr_str(), "input args:\narg[0]: 'env'\narg[1]: $'-vS printf x%sx\\\\n A \\t B \\x0B\\x0C\\r\\n'\nexecutable: 'printf'\narg[0]: $'x%sx\\n'\narg[1]: 'A'\narg[2]: 'B'\n");
}
// FixMe: This test fails on MACOS:
// thread 'test_env::test_gnu_e20' panicked at 'assertion failed: `(left == right)`
// left: `"A=B C=D\n__CF_USER_TEXT_ENCODING=0x1F5:0x0:0x0\n"`,
// right: `"A=B C=D\n"`', tests/by-util/test_env.rs:369:5
#[cfg(not(target_os = "macos"))]
#[test]
fn test_gnu_e20() {
let scene = TestScenario::new(util_name!());
let env_bin = String::from(crate::common::util::TESTS_BINARY) + " " + util_name!();
let (input, output) = (
[
String::from("-i"),
String::from(r#"-SA="B\_C=D" "#) + env_bin.escape_default().to_string().as_str() + "",
],
"A=B C=D\n",
);
let out = scene.ucmd().args(&input).succeeds();
assert_eq!(out.stdout_str(), output);
}
#[test]
fn test_split_string_misc() {
use ::env::native_int_str::NCvt;
use ::env::parse_args_from_str;
assert_eq!(
NCvt::convert(vec!["A=B", "FOO=AR", "sh", "-c", "echo $A$FOO"]),
parse_args_from_str(&NCvt::convert(r#"A=B FOO=AR sh -c "echo \$A\$FOO""#)).unwrap(),
);
assert_eq!(
NCvt::convert(vec!["A=B", "FOO=AR", "sh", "-c", "echo $A$FOO"]),
parse_args_from_str(&NCvt::convert(r#"A=B FOO=AR sh -c 'echo $A$FOO'"#)).unwrap()
);
assert_eq!(
NCvt::convert(vec!["A=B", "FOO=AR", "sh", "-c", "echo $A$FOO"]),
parse_args_from_str(&NCvt::convert(r#"A=B FOO=AR sh -c 'echo $A$FOO'"#)).unwrap()
);
assert_eq!(
NCvt::convert(vec!["-i", "A=B ' C"]),
parse_args_from_str(&NCvt::convert(r#"-i A='B \' C'"#)).unwrap()
);
}
#[test]
fn test_split_string_environment_vars_test() {
std::env::set_var("FOO", "BAR");
assert_eq!(
NCvt::convert(vec!["FOO=bar", "sh", "-c", "echo xBARx =$FOO="]),
::env::parse_args_from_str(&NCvt::convert(r#"FOO=bar sh -c "echo x${FOO}x =\$FOO=""#))
.unwrap(),
);
}
#[macro_export]
macro_rules! compare_with_gnu {
( $ts:expr, $args:expr ) => {{
println!("==========================================================================");
let result = $ts.ucmd().args($args).run();
#[cfg(target_os = "linux")]
{
let reference = expected_result(&$ts, $args);
if let Ok(reference) = reference {
let success = result.code() == reference.code()
&& result.stdout_str() == reference.stdout_str()
&& result.stderr_str() == reference.stderr_str();
if !success {
println!("reference.code: {}", reference.code());
println!(" result.code: {}", result.code());
println!("reference.cout: {}", reference.stdout_str());
println!(" result.cout: {}", result.stdout_str());
println!("reference.cerr: {}", reference.stderr_str_lossy());
println!(" result.cerr: {}", result.stderr_str_lossy());
}
assert_eq!(result.code(), reference.code());
assert_eq!(result.stdout_str(), reference.stdout_str());
assert_eq!(result.stderr_str(), reference.stderr_str());
} else {
println!(
"gnu reference test skipped. Reason: {:?}",
reference.unwrap_err()
);
}
}
result
}};
}
#[test]
fn test_env_with_gnu_reference_parsing_errors() {
let ts = TestScenario::new(util_name!());
compare_with_gnu!(ts, &["-S\\|echo hallo"]) // no quotes, invalid escape sequence |
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\|' in -S\n");
compare_with_gnu!(ts, &["-S\\a"]) // no quotes, invalid escape sequence a
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\a' in -S\n");
compare_with_gnu!(ts, &["-S\"\\a\""]) // double quotes, invalid escape sequence a
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\a' in -S\n");
compare_with_gnu!(ts, &[r#"-S"\a""#]) // same as before, just using r#""#
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\a' in -S\n");
compare_with_gnu!(ts, &["-S'\\a'"]) // single quotes, invalid escape sequence a
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\a' in -S\n");
compare_with_gnu!(ts, &[r#"-S\|\&\;"#]) // no quotes, invalid escape sequence |
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\|' in -S\n");
compare_with_gnu!(ts, &[r#"-S\<\&\;"#]) // no quotes, invalid escape sequence <
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\<' in -S\n");
compare_with_gnu!(ts, &[r#"-S\>\&\;"#]) // no quotes, invalid escape sequence >
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\>' in -S\n");
compare_with_gnu!(ts, &[r#"-S\`\&\;"#]) // no quotes, invalid escape sequence `
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\`' in -S\n");
compare_with_gnu!(ts, &[r#"-S"\`\&\;""#]) // double quotes, invalid escape sequence `
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\`' in -S\n");
compare_with_gnu!(ts, &[r#"-S'\`\&\;'"#]) // single quotes, invalid escape sequence `
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\`' in -S\n");
compare_with_gnu!(ts, &[r#"-S\`"#]) // ` escaped without quotes
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\`' in -S\n");
compare_with_gnu!(ts, &[r#"-S"\`""#]) // ` escaped in double quotes
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\`' in -S\n");
compare_with_gnu!(ts, &[r#"-S'\`'"#]) // ` escaped in single quotes
.failure()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\`' in -S\n");
ts.ucmd()
.args(&[r#"-S\🦉"#]) // ` escaped in single quotes
.fails()
.code_is(125)
.no_stdout()
.stderr_is("env: invalid sequence '\\\u{FFFD}' in -S\n"); // gnu doesn't show the owl. Instead a invalid unicode ?
}
#[test]
fn test_env_with_gnu_reference_empty_executable_single_quotes() {
let ts = TestScenario::new(util_name!());
ts.ucmd()
.args(&["-S''"]) // empty single quotes, considered as program name
.fails()
.code_is(127)
.no_stdout()
.stderr_is("env: '': No such file or directory\n"); // gnu version again adds escaping here
}
#[test]
fn test_env_with_gnu_reference_empty_executable_double_quotes() {
let ts = TestScenario::new(util_name!());
compare_with_gnu!(ts, &["-S\"\""]) // empty double quotes, considered as program name
.failure()
.code_is(127)
.no_stdout()
.stderr_is("env: '': No such file or directory\n");
}
#[cfg(test)]
mod tests_split_iterator {
enum EscapeStyle {
/// No escaping.
None,
/// Wrap in single quotes.
SingleQuoted,
/// Single quotes combined with backslash.
Mixed,
}
/// Determines escaping style to use.
fn escape_style(s: &str) -> EscapeStyle {
if s.is_empty() {
return EscapeStyle::SingleQuoted;
}
let mut special = false;
let mut newline = false;
let mut single_quote = false;
for c in s.chars() {
match c {
'\n' => {
newline = true;
special = true;
}
'\'' => {
single_quote = true;
special = true;
}
'|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t'
| '*' | '?' | '[' | '#' | '˜' | '=' | '%' => {
special = true;
}
_ => continue,
}
}
if !special {
EscapeStyle::None
} else if newline && !single_quote {
EscapeStyle::SingleQuoted
} else {
EscapeStyle::Mixed
}
}
/// Escapes special characters in a string, so that it will retain its literal
/// meaning when used as a part of command in Unix shell.
///
/// It tries to avoid introducing any unnecessary quotes or escape characters,
/// but specifics regarding quoting style are left unspecified.
pub fn quote(s: &str) -> std::borrow::Cow<str> {
// We are going somewhat out of the way to provide
// minimal amount of quoting in typical cases.
match escape_style(s) {
EscapeStyle::None => s.into(),
EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
EscapeStyle::Mixed => {
let mut quoted = String::new();
quoted.push('\'');
for c in s.chars() {
if c == '\'' {
quoted.push_str("'\\''");
} else {
quoted.push(c);
}
}
quoted.push('\'');
quoted.into()
}
}
}
/// Joins arguments into a single command line suitable for execution in Unix
/// shell.
///
/// Each argument is quoted using [`quote`] to preserve its literal meaning when
/// parsed by Unix shell.
///
/// Note: This function is essentially an inverse of [`split`].
///
/// # Examples
///
/// Logging executed commands in format that can be easily copied and pasted
/// into an actual shell:
///
/// ```rust,no_run
/// fn execute(args: &[&str]) {
/// use std::process::Command;
/// println!("Executing: {}", shell_words::join(args));
/// Command::new(&args[0])
/// .args(&args[1..])
/// .spawn()
/// .expect("failed to start subprocess")
/// .wait()
/// .expect("failed to wait for subprocess");
/// }
///
/// execute(&["python", "-c", "print('Hello world!')"]);
/// ```
///
/// [`quote`]: fn.quote.html
/// [`split`]: fn.split.html
pub fn join<I, S>(words: I) -> String
where
I: IntoIterator<Item = S>,
S: AsRef<str>,
{
let mut line = words.into_iter().fold(String::new(), |mut line, word| {
let quoted = quote(word.as_ref());
line.push_str(quoted.as_ref());
line.push(' ');
line
});
line.pop();
line
}
use std::ffi::OsString;
use ::env::parse_error::ParseError;
use env::native_int_str::{from_native_int_representation_owned, Convert, NCvt};
fn split(input: &str) -> Result<Vec<OsString>, ParseError> {
::env::split_iterator::split(&NCvt::convert(input)).map(|vec| {
vec.into_iter()
.map(from_native_int_representation_owned)
.collect()
})
}
fn split_ok(cases: &[(&str, &[&str])]) {
for (i, &(input, expected)) in cases.iter().enumerate() {
match split(input) {
Err(actual) => {
panic!(
"[{i}] calling split({:?}):\nexpected: Ok({:?})\n actual: Err({:?})\n",
input, expected, actual
);
}
Ok(actual) => {
assert!(
expected == actual.as_slice(),
"[{i}] After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n",
input,
expected,
actual
);
}
}
}
}
#[test]
fn split_empty() {
split_ok(&[("", &[])]);
}
#[test]
fn split_initial_whitespace_is_removed() {
split_ok(&[
(" a", &["a"]),
("\t\t\t\tbar", &["bar"]),
("\t \nc", &["c"]),
]);
}
#[test]
fn split_trailing_whitespace_is_removed() {
split_ok(&[
("a ", &["a"]),
("b\t", &["b"]),
("c\t \n \n \n", &["c"]),
("d\n\n", &["d"]),
]);
}
#[test]
fn split_carriage_return() {
split_ok(&[("c\ra\r'\r'\r", &["c", "a", "\r"])]);
}
#[test]
fn split_() {
split_ok(&[("\\'\\'", &["''"])]);
}
#[test]
fn split_single_quotes() {
split_ok(&[
(r#"''"#, &[r#""#]),
(r#"'a'"#, &[r#"a"#]),
(r#"'\\'"#, &[r#"\"#]),
(r#"' \\ '"#, &[r#" \ "#]),
(r#"'#'"#, &[r#"#"#]),
]);
}
#[test]
fn split_double_quotes() {
split_ok(&[
(r#""""#, &[""]),
(r#""""""#, &[""]),
(r#""a b c' d""#, &["a b c' d"]),
(r#""\$""#, &["$"]),
(r#""`""#, &["`"]),
(r#""\"""#, &["\""]),
(r#""\\""#, &["\\"]),
("\"\n\"", &["\n"]),
("\"\\\n\"", &[""]),
]);
}
#[test]
fn split_unquoted() {
split_ok(&[
(r#"\\|\\&\\;"#, &[r#"\|\&\;"#]),
(r#"\\<\\>"#, &[r#"\<\>"#]),
(r#"\\(\\)"#, &[r#"\(\)"#]),
(r#"\$"#, &[r#"$"#]),
(r#"\""#, &[r#"""#]),
(r#"\'"#, &[r#"'"#]),
("\\\n", &[]),
(" \\\n \n", &[]),
("a\nb\nc", &["a", "b", "c"]),
("a\\\nb\\\nc", &["abc"]),
("foo bar baz", &["foo", "bar", "baz"]),
]);
}
#[test]
fn split_trailing_backslash() {
assert_eq!(
split("\\"),
Err(ParseError::InvalidBackslashAtEndOfStringInMinusS {
pos: 1,
quoting: "Delimiter".into()
})
);
assert_eq!(
split(" \\"),
Err(ParseError::InvalidBackslashAtEndOfStringInMinusS {
pos: 2,
quoting: "Delimiter".into()
})
);
assert_eq!(
split("a\\"),
Err(ParseError::InvalidBackslashAtEndOfStringInMinusS {
pos: 2,
quoting: "Unquoted".into()
})
);
}
#[test]
fn split_errors() {
assert_eq!(
split("'abc"),
Err(ParseError::MissingClosingQuote { pos: 4, c: '\'' })
);
assert_eq!(
split("\""),
Err(ParseError::MissingClosingQuote { pos: 1, c: '"' })
);
assert_eq!(
split("'\\"),
Err(ParseError::MissingClosingQuote { pos: 2, c: '\'' })
);
assert_eq!(
split("'\\"),
Err(ParseError::MissingClosingQuote { pos: 2, c: '\'' })
);
assert_eq!(
split(r#""$""#),
Err(ParseError::ParsingOfVariableNameFailed {
pos: 2,
msg: "Missing variable name".into()
}),
);
}
#[test]
fn split_error_fail_with_unknown_escape_sequences() {
assert_eq!(
split("\\a"),
Err(ParseError::InvalidSequenceBackslashXInMinusS { pos: 1, c: 'a' })
);
assert_eq!(
split("\"\\a\""),
Err(ParseError::InvalidSequenceBackslashXInMinusS { pos: 2, c: 'a' })
);
assert_eq!(
split("'\\a'"),
Err(ParseError::InvalidSequenceBackslashXInMinusS { pos: 2, c: 'a' })
);
assert_eq!(
split(r#""\a""#),
Err(ParseError::InvalidSequenceBackslashXInMinusS { pos: 2, c: 'a' })
);
assert_eq!(
split(r#"\🦉"#),
Err(ParseError::InvalidSequenceBackslashXInMinusS {
pos: 1,
c: '\u{FFFD}'
})
);
}
#[test]
fn split_comments() {
split_ok(&[
(r#" x # comment "#, &["x"]),
(r#" w1#w2 "#, &["w1#w2"]),
(r#"'not really a # comment'"#, &["not really a # comment"]),
(" a # very long comment \n b # another comment", &["a", "b"]),
]);
}
#[test]
fn test_quote() {
assert_eq!(quote(""), "''");
assert_eq!(quote("'"), "''\\'''");
assert_eq!(quote("abc"), "abc");
assert_eq!(quote("a \n b"), "'a \n b'");
assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
}
#[test]
fn test_join() {
assert_eq!(join(["a", "b", "c"]), "a b c");
assert_eq!(join([" ", "$", "\n"]), "' ' '$' '\n'");
}
#[test]
fn join_followed_by_split_is_identity() {
let cases: Vec<&[&str]> = vec![
&["a"],
&["python", "-c", "print('Hello world!')"],
&["echo", " arg with spaces ", "arg \' with \" quotes"],
&["even newlines are quoted correctly\n", "\n", "\n\n\t "],
&["$", "`test`"],
&["cat", "~user/log*"],
&["test", "'a \"b", "\"X'"],
&["empty", "", "", ""],
];
for argv in cases {
let args = join(argv);
assert_eq!(split(&args).unwrap(), argv);
}
}
}
mod test_raw_string_parser {
use std::{
borrow::Cow,
ffi::{OsStr, OsString},
};
use env::{
native_int_str::{
from_native_int_representation, from_native_int_representation_owned,
to_native_int_representation, NativeStr,
},
string_expander::StringExpander,
string_parser,
};
const LEN_OWL: usize = if cfg!(target_os = "windows") { 2 } else { 4 };
#[test]
fn test_ascii_only_take_one_look_at_correct_data_and_end_behavior() {
let input = "hello";
let cow = to_native_int_representation(OsStr::new(input));
let mut uut = StringExpander::new(&cow);
for c in input.chars() {
assert_eq!(c, uut.get_parser().peek().unwrap());
uut.take_one().unwrap();
}
assert_eq!(
uut.get_parser().peek(),
Err(string_parser::Error {
peek_position: 5,
err_type: string_parser::ErrorType::EndOfInput
})
);
uut.take_one().unwrap_err();
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
input
);
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
""
);
}
#[test]
fn test_multi_byte_codes_take_one_look_at_correct_data_and_end_behavior() {
let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉");
let cow = to_native_int_representation(input.as_os_str());
let mut uut = StringExpander::new(&cow);
for _i in 0..3 {
assert_eq!(uut.get_parser().peek().unwrap(), '\u{FFFD}');
uut.take_one().unwrap();
assert_eq!(uut.get_parser().peek().unwrap(), 'x');
uut.take_one().unwrap();
}
assert_eq!(uut.get_parser().peek().unwrap(), '\u{FFFD}');
uut.take_one().unwrap();
assert_eq!(
uut.get_parser().peek(),
Err(string_parser::Error {
peek_position: 10 * LEN_OWL + 3,
err_type: string_parser::ErrorType::EndOfInput
})
);
uut.take_one().unwrap_err();
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
input
);
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
""
);
}
#[test]
fn test_multi_byte_codes_put_one_ascii_start_middle_end_try_invalid_ascii() {
let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉");
let cow = to_native_int_representation(input.as_os_str());
let owl: char = '🦉';
let mut uut = StringExpander::new(&cow);
uut.put_one_char('a');
for _i in 0..3 {
assert_eq!(uut.get_parser().peek().unwrap(), '\u{FFFD}');
uut.take_one().unwrap();
uut.put_one_char('a');
assert_eq!(uut.get_parser().peek().unwrap(), 'x');
uut.take_one().unwrap();
uut.put_one_char('a');
}
assert_eq!(uut.get_parser().peek().unwrap(), '\u{FFFD}');
uut.take_one().unwrap();
uut.put_one_char(owl);
uut.put_one_char('a');
assert_eq!(
uut.get_parser().peek(),
Err(string_parser::Error {
peek_position: LEN_OWL * 10 + 3,
err_type: string_parser::ErrorType::EndOfInput
})
);
uut.take_one().unwrap_err();
uut.put_one_char('a');
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
"a🦉🦉🦉axa🦉🦉axa🦉axa🦉🦉🦉🦉🦉aa"
);
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
""
);
}
#[test]
fn test_multi_byte_codes_skip_one_take_one_skip_until_ascii_char_or_end() {
let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉");
let cow = to_native_int_representation(input.as_os_str());
let mut uut = StringExpander::new(&cow);
uut.skip_one().unwrap(); // skip 🦉🦉🦉
let p = LEN_OWL * 3;
assert_eq!(uut.get_peek_position(), p);
uut.skip_one().unwrap(); // skip x
assert_eq!(uut.get_peek_position(), p + 1);
uut.take_one().unwrap(); // take 🦉🦉
let p = p + 1 + LEN_OWL * 2;
assert_eq!(uut.get_peek_position(), p);
uut.skip_one().unwrap(); // skip x
assert_eq!(uut.get_peek_position(), p + 1);
uut.get_parser_mut().skip_until_char_or_end('x'); // skip 🦉
let p = p + 1 + LEN_OWL;
assert_eq!(uut.get_peek_position(), p);
uut.take_one().unwrap(); // take x
uut.get_parser_mut().skip_until_char_or_end('x'); // skip 🦉🦉🦉🦉 till end
let p = p + 1 + LEN_OWL * 4;
assert_eq!(uut.get_peek_position(), p);
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
"🦉🦉x"
);
}
#[test]
fn test_multi_byte_codes_skip_multiple_ascii_bounded_good_and_bad() {
let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉");
let cow = to_native_int_representation(input.as_os_str());
let mut uut = StringExpander::new(&cow);
uut.get_parser_mut().skip_multiple(0);
assert_eq!(uut.get_peek_position(), 0);
let p = LEN_OWL * 3;
uut.get_parser_mut().skip_multiple(p); // skips 🦉🦉🦉
assert_eq!(uut.get_peek_position(), p);
uut.take_one().unwrap(); // take x
assert_eq!(uut.get_peek_position(), p + 1);
let step = LEN_OWL * 3 + 1;
uut.get_parser_mut().skip_multiple(step); // skips 🦉🦉x🦉
let p = p + 1 + step;
assert_eq!(uut.get_peek_position(), p);
uut.take_one().unwrap(); // take x
assert_eq!(uut.get_peek_position(), p + 1);
let step = 4 * LEN_OWL;
uut.get_parser_mut().skip_multiple(step); // skips 🦉🦉🦉🦉
let p = p + 1 + step;
assert_eq!(uut.get_peek_position(), p);
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
"xx"
);
}
#[test]
fn test_multi_byte_codes_put_string_utf8_start_middle_end() {
let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉");
let cow = to_native_int_representation(input.as_os_str());
let mut uut = StringExpander::new(&cow);
uut.put_string("🦔oo");
uut.take_one().unwrap(); // takes 🦉🦉🦉
uut.put_string("oo🦔");
uut.take_one().unwrap(); // take x
uut.get_parser_mut().skip_until_char_or_end('\n'); // skips till end
uut.put_string("o🦔o");
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
"🦔oo🦉🦉🦉oo🦔xo🦔o"
);
}
#[test]
fn test_multi_byte_codes_look_at_remaining_start_middle_end() {
let input = "🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉";
let cow = to_native_int_representation(OsStr::new(input));
let mut uut = StringExpander::new(&cow);
assert_eq!(uut.get_parser().peek_remaining(), OsStr::new(input));
uut.take_one().unwrap(); // takes 🦉🦉🦉
assert_eq!(uut.get_parser().peek_remaining(), OsStr::new(&input[12..]));
uut.get_parser_mut().skip_until_char_or_end('\n'); // skips till end
assert_eq!(uut.get_parser().peek_remaining(), OsStr::new(""));
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
"🦉🦉🦉"
);
}
#[test]
fn test_deal_with_invalid_encoding() {
let owl_invalid_part;
let (brace_1, brace_2);
#[cfg(target_os = "windows")]
{
let mut buffer = [0u16; 2];
let owl = '🦉'.encode_utf16(&mut buffer);
owl_invalid_part = owl[0];
brace_1 = '<'.encode_utf16(&mut buffer).to_vec();
brace_2 = '>'.encode_utf16(&mut buffer).to_vec();
}
#[cfg(not(target_os = "windows"))]
{
let mut buffer = [0u8; 4];
let owl = '🦉'.encode_utf8(&mut buffer);
owl_invalid_part = owl.bytes().next().unwrap();
brace_1 = [b'<'].to_vec();
brace_2 = [b'>'].to_vec();
}
let mut input_ux = brace_1;
input_ux.push(owl_invalid_part);
input_ux.extend(brace_2);
let input_str = from_native_int_representation(Cow::Borrowed(&input_ux));
let mut uut = StringExpander::new(&input_ux);
assert_eq!(uut.get_parser().peek_remaining(), input_str);
assert_eq!(uut.get_parser().peek().unwrap(), '<');
uut.take_one().unwrap(); // takes "<"
assert_eq!(
uut.get_parser().peek_remaining(),
NativeStr::new(&input_str).split_at(1).1
);
assert_eq!(uut.get_parser().peek().unwrap(), '\u{FFFD}');
uut.take_one().unwrap(); // takes owl_b
assert_eq!(
uut.get_parser().peek_remaining(),
NativeStr::new(&input_str).split_at(2).1
);
assert_eq!(uut.get_parser().peek().unwrap(), '>');
uut.get_parser_mut().skip_until_char_or_end('\n');
assert_eq!(uut.get_parser().peek_remaining(), OsStr::new(""));
uut.take_one().unwrap_err();
assert_eq!(
from_native_int_representation_owned(uut.take_collected_output()),
NativeStr::new(&input_str).split_at(2).0
);
}
}

View file

@ -15,7 +15,6 @@ use pretty_assertions::assert_eq;
use rlimit::setrlimit; use rlimit::setrlimit;
#[cfg(feature = "sleep")] #[cfg(feature = "sleep")]
use rstest::rstest; use rstest::rstest;
#[cfg(unix)]
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::VecDeque; use std::collections::VecDeque;
#[cfg(not(windows))] #[cfg(not(windows))]
@ -352,6 +351,11 @@ impl CmdResult {
std::str::from_utf8(&self.stderr).unwrap() std::str::from_utf8(&self.stderr).unwrap()
} }
/// Returns the program's standard error as a string slice, automatically handling invalid utf8
pub fn stderr_str_lossy(&self) -> Cow<'_, str> {
String::from_utf8_lossy(&self.stderr)
}
/// Returns the program's standard error as a string /// Returns the program's standard error as a string
/// consumes self /// consumes self
pub fn stderr_move_str(self) -> String { pub fn stderr_move_str(self) -> String {
@ -372,6 +376,14 @@ impl CmdResult {
#[track_caller] #[track_caller]
pub fn code_is(&self, expected_code: i32) -> &Self { pub fn code_is(&self, expected_code: i32) -> &Self {
let fails = self.code() != expected_code;
if fails {
eprintln!(
"stdout:\n{}\nstderr:\n{}",
self.stdout_str(),
self.stderr_str()
);
}
assert_eq!(self.code(), expected_code); assert_eq!(self.code(), expected_code);
self self
} }
@ -395,10 +407,8 @@ impl CmdResult {
pub fn success(&self) -> &Self { pub fn success(&self) -> &Self {
assert!( assert!(
self.succeeded(), self.succeeded(),
"Command was expected to succeed. Exit code: {}.\nstdout = {}\n stderr = {}", "Command was expected to succeed. code: {}\nstdout = {}\n stderr = {}",
self.exit_status() self.code(),
.code()
.map_or("n/a".to_string(), |code| code.to_string()),
self.stdout_str(), self.stdout_str(),
self.stderr_str() self.stderr_str()
); );
@ -2674,7 +2684,7 @@ pub fn expected_result(ts: &TestScenario, args: &[&str]) -> std::result::Result<
let (stdout, stderr): (String, String) = if cfg!(target_os = "linux") { let (stdout, stderr): (String, String) = if cfg!(target_os = "linux") {
( (
result.stdout_str().to_string(), result.stdout_str().to_string(),
result.stderr_str().to_string(), result.stderr_str_lossy().to_string(),
) )
} else { } else {
// `host_name_for` added prefix, strip 'g' prefix from results: // `host_name_for` added prefix, strip 'g' prefix from results:
@ -2682,7 +2692,7 @@ pub fn expected_result(ts: &TestScenario, args: &[&str]) -> std::result::Result<
let to = &from[1..]; let to = &from[1..];
( (
result.stdout_str().replace(&from, to), result.stdout_str().replace(&from, to),
result.stderr_str().replace(&from, to), result.stderr_str_lossy().replace(&from, to),
) )
}; };

1
tests/fixtures/env/runBat.bat vendored Normal file
View file

@ -0,0 +1 @@
echo Hello Windows World!

View file

@ -221,6 +221,8 @@ grep -rlE '/usr/local/bin/\s?/usr/local/bin' init.cfg tests/* | xargs -r sed -Ei
# we should not regress our project just to match what GNU is going. # we should not regress our project just to match what GNU is going.
# So, do some changes on the fly # So, do some changes on the fly
patch -N -r - -d "$path_GNU" -p 1 -i "`realpath \"$path_UUTILS/util/gnu-patches/tests_env_env-S.pl.patch\"`" || true
sed -i -e "s|rm: cannot remove 'e/slink'|rm: cannot remove 'e'|g" tests/rm/fail-eacces.sh sed -i -e "s|rm: cannot remove 'e/slink'|rm: cannot remove 'e'|g" tests/rm/fail-eacces.sh
sed -i -e "s|rm: cannot remove 'a/b'|rm: cannot remove 'a'|g" tests/rm/fail-2eperm.sh sed -i -e "s|rm: cannot remove 'a/b'|rm: cannot remove 'a'|g" tests/rm/fail-2eperm.sh

View file

@ -0,0 +1,47 @@
diff --git a/tests/env/env-S.pl b/tests/env/env-S.pl
index 710ca82cf..af7cf6efa 100755
--- a/tests/env/env-S.pl
+++ b/tests/env/env-S.pl
@@ -209,27 +209,28 @@ my @Tests =
{ERR=>"$prog: no terminating quote in -S string\n"}],
['err5', q[-S'A=B\\q'], {EXIT=>125},
{ERR=>"$prog: invalid sequence '\\q' in -S\n"}],
- ['err6', q[-S'A=$B'], {EXIT=>125},
- {ERR=>"$prog: only \${VARNAME} expansion is supported, error at: \$B\n"}],
+ ['err6', q[-S'A=$B echo hello'], {EXIT=>0},
+ {OUT=>"hello"}],
['err7', q[-S'A=${B'], {EXIT=>125},
- {ERR=>"$prog: only \${VARNAME} expansion is supported, " .
- "error at: \${B\n"}],
+ {ERR=>"$prog" . qq[: variable name issue (at 5): Missing closing brace\n]}],
['err8', q[-S'A=${B%B}'], {EXIT=>125},
- {ERR=>"$prog: only \${VARNAME} expansion is supported, " .
- "error at: \${B%B}\n"}],
+ {ERR=>"$prog" . qq[: variable name issue (at 5): Unexpected character: '%', expected a closing brace ('}') or colon (':')\n]}],
['err9', q[-S'A=${9B}'], {EXIT=>125},
- {ERR=>"$prog: only \${VARNAME} expansion is supported, " .
- "error at: \${9B}\n"}],
+ {ERR=>"$prog" . qq[: variable name issue (at 4): Unexpected character: '9', expected variable name must not start with 0..9\n]}],
# Test incorrect shebang usage (extraneous whitespace).
['err_sp2', q['-v -S cat -n'], {EXIT=>125},
- {ERR=>"env: invalid option -- ' '\n" .
- "env: use -[v]S to pass options in shebang lines\n" .
- "Try 'env --help' for more information.\n"}],
+ {ERR=>"$prog: error: unexpected argument '- ' found\n\n" .
+ " tip: to pass '- ' as a value, use '-- - '\n\n" .
+ "Usage: $prog [OPTION]... [-] [NAME=VALUE]... [COMMAND [ARG]...]\n\n" .
+ "For more information, try '--help'.\n" .
+ "$prog: use -[v]S to pass options in shebang lines\n"}],
['err_sp3', q['-v -S cat -n'], {EXIT=>125}, # embedded tab after -v
- {ERR=>"env: invalid option -- '\t'\n" .
- "env: use -[v]S to pass options in shebang lines\n" .
- "Try 'env --help' for more information.\n"}],
+ {ERR=>"$prog: error: unexpected argument '-\t' found\n\n" .
+ " tip: to pass '-\t' as a value, use '-- -\t'\n\n" .
+ "Usage: $prog [OPTION]... [-] [NAME=VALUE]... [COMMAND [ARG]...]\n\n" .
+ "For more information, try '--help'.\n" .
+ "$prog: use -[v]S to pass options in shebang lines\n"}],
# Also diagnose incorrect shebang usage when failing to exec.
# This typically happens with: