1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-08-01 21:47:46 +00:00

Merge pull request #5586 from tertsdiepraam/expr-ast-refactor

`expr`: refactor AST and parsing
This commit is contained in:
Sylvestre Ledru 2023-11-28 07:51:11 +01:00 committed by GitHub
commit 0e8197e2f0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 535 additions and 705 deletions

View file

@ -3,14 +3,19 @@
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
use std::fmt::Display;
use clap::{crate_version, Arg, ArgAction, Command};
use syntax_tree::AstNode;
use uucore::{
error::{UResult, USimpleError, UUsageError},
display::Quotable,
error::{UError, UResult},
format_usage, help_about, help_section, help_usage,
};
use crate::syntax_tree::is_truthy;
mod syntax_tree;
mod tokens;
mod options {
pub const VERSION: &str = "version";
@ -18,6 +23,51 @@ mod options {
pub const EXPRESSION: &str = "expression";
}
pub type ExprResult<T> = Result<T, ExprError>;
#[derive(Debug, PartialEq, Eq)]
pub enum ExprError {
UnexpectedArgument(String),
MissingArgument(String),
NonIntegerArgument,
MissingOperand,
DivisionByZero,
InvalidRegexExpression,
ExpectedClosingBraceAfter(String),
}
impl Display for ExprError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::UnexpectedArgument(s) => {
write!(f, "syntax error: unexpected argument {}", s.quote())
}
Self::MissingArgument(s) => {
write!(f, "syntax error: missing argument after {}", s.quote())
}
Self::NonIntegerArgument => write!(f, "non-integer argument"),
Self::MissingOperand => write!(f, "missing operand"),
Self::DivisionByZero => write!(f, "division by zero"),
Self::InvalidRegexExpression => write!(f, "Invalid regex expression"),
Self::ExpectedClosingBraceAfter(s) => {
write!(f, "expected ')' after {}", s.quote())
}
}
}
}
impl std::error::Error for ExprError {}
impl UError for ExprError {
fn code(&self) -> i32 {
2
}
fn usage(&self) -> bool {
*self == Self::MissingOperand
}
}
pub fn uu_app() -> Command {
Command::new(uucore::util_name())
.version(crate_version!())
@ -53,36 +103,15 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> {
// For expr utility we do not want getopts.
// The following usage should work without escaping hyphens: `expr -15 = 1 + 2 \* \( 3 - -4 \)`
let matches = uu_app().try_get_matches_from(args)?;
let token_strings = matches
let token_strings: Vec<&str> = matches
.get_many::<String>(options::EXPRESSION)
.map(|v| v.into_iter().map(|s| s.as_ref()).collect::<Vec<_>>())
.unwrap_or_default();
if token_strings.is_empty() {
return Err(UUsageError::new(2, "missing operand"));
}
match process_expr(&token_strings[..]) {
Ok(expr_result) => print_expr_ok(&expr_result),
Err(expr_error) => Err(USimpleError::new(2, &expr_error)),
let res = AstNode::parse(&token_strings)?.eval()?;
println!("{res}");
if !is_truthy(&res) {
return Err(1.into());
}
}
fn process_expr(token_strings: &[&str]) -> Result<String, String> {
let maybe_tokens = tokens::strings_to_tokens(token_strings);
let maybe_ast = syntax_tree::tokens_to_ast(maybe_tokens);
evaluate_ast(maybe_ast)
}
fn print_expr_ok(expr_result: &str) -> UResult<()> {
println!("{expr_result}");
if expr_result.parse::<i32>() == Ok(0) || expr_result.is_empty() {
Err(1.into())
} else {
Ok(())
}
}
fn evaluate_ast(maybe_ast: Result<Box<syntax_tree::AstNode>, String>) -> Result<String, String> {
maybe_ast.and_then(|ast| ast.evaluate())
Ok(())
}

File diff suppressed because it is too large Load diff

View file

@ -1,147 +0,0 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//!
//! The following tokens are present in the expr grammar:
//! * integer literal;
//! * string literal;
//! * infix binary operators;
//! * prefix operators.
//!
//! According to the man-page of expr we have expression split into tokens (each token -- separate CLI-argument).
//! Hence all we need is to map the strings into the Token structures, except for some ugly fiddling with +-escaping.
//!
// spell-checker:ignore (ToDO) paren
#[derive(Debug, Clone)]
pub enum Token {
Value {
value: String,
},
ParOpen,
ParClose,
InfixOp {
precedence: u8,
left_assoc: bool,
value: String,
},
PrefixOp {
arity: usize,
value: String,
},
}
impl Token {
fn new_infix_op(v: &str, left_assoc: bool, precedence: u8) -> Self {
Self::InfixOp {
left_assoc,
precedence,
value: v.into(),
}
}
fn new_value(v: &str) -> Self {
Self::Value { value: v.into() }
}
fn is_infix_plus(&self) -> bool {
match self {
Self::InfixOp { value, .. } => value == "+",
_ => false,
}
}
fn is_a_value(&self) -> bool {
matches!(*self, Self::Value { .. })
}
fn is_a_close_paren(&self) -> bool {
matches!(*self, Self::ParClose)
}
}
pub fn strings_to_tokens(strings: &[&str]) -> Result<Vec<(usize, Token)>, String> {
let mut tokens_acc = Vec::with_capacity(strings.len());
let mut tok_idx = 1;
for s in strings {
let token_if_not_escaped = match *s {
"(" => Token::ParOpen,
")" => Token::ParClose,
"^" => Token::new_infix_op(s, false, 7),
":" => Token::new_infix_op(s, true, 6),
"*" | "/" | "%" => Token::new_infix_op(s, true, 5),
"+" | "-" => Token::new_infix_op(s, true, 4),
"=" | "!=" | "<" | ">" | "<=" | ">=" => Token::new_infix_op(s, true, 3),
"&" => Token::new_infix_op(s, true, 2),
"|" => Token::new_infix_op(s, true, 1),
"match" | "index" => Token::PrefixOp {
arity: 2,
value: s.to_string(),
},
"substr" => Token::PrefixOp {
arity: 3,
value: s.to_string(),
},
"length" => Token::PrefixOp {
arity: 1,
value: s.to_string(),
},
_ => Token::new_value(s),
};
push_token_if_not_escaped(&mut tokens_acc, tok_idx, token_if_not_escaped, s);
tok_idx += 1;
}
maybe_dump_tokens_acc(&tokens_acc);
Ok(tokens_acc)
}
fn maybe_dump_tokens_acc(tokens_acc: &[(usize, Token)]) {
use std::env;
if let Ok(debug_var) = env::var("EXPR_DEBUG_TOKENS") {
if debug_var == "1" {
println!("EXPR_DEBUG_TOKENS");
for token in tokens_acc {
println!("\t{token:?}");
}
}
}
}
fn push_token_if_not_escaped(acc: &mut Vec<(usize, Token)>, tok_idx: usize, token: Token, s: &str) {
// `+` may be escaped such as `expr + 1` and `expr 1 + + 1`
let prev_is_plus = match acc.last() {
None => false,
Some(t) => t.1.is_infix_plus(),
};
let should_use_as_escaped = if prev_is_plus && acc.len() >= 2 {
let pre_prev = &acc[acc.len() - 2];
!(pre_prev.1.is_a_value() || pre_prev.1.is_a_close_paren())
} else {
prev_is_plus
};
if should_use_as_escaped {
acc.pop();
acc.push((tok_idx, Token::new_value(s)));
} else {
acc.push((tok_idx, token));
}
}