1
Fork 0
mirror of https://github.com/RGBCube/uutils-coreutils synced 2025-07-29 12:07:46 +00:00

Merge pull request #708 from RGafiyatullin/impl-expr-pre-merge-2

Implement expr.
This commit is contained in:
Heather 2015-10-10 11:29:02 +04:00
commit 57839e4703
6 changed files with 698 additions and 1 deletions

View file

@ -21,6 +21,7 @@ all = [
"echo",
"env",
"expand",
"expr",
"factor",
"false",
"fmt",
@ -96,6 +97,7 @@ du = { optional=true, path="src/du" }
echo = { optional=true, path="src/echo" }
env = { optional=true, path="src/env" }
expand = { optional=true, path="src/expand" }
expr = { optional=true, path="src/expr" }
factor = { optional=true, path="src/factor" }
false = { optional=true, path="src/false" }
fmt = { optional=true, path="src/fmt" }

View file

@ -53,6 +53,7 @@ PROGS := \
echo \
env \
expand \
expr \
factor \
false \
fmt \
@ -144,7 +145,7 @@ INSTALLEES := \
# Shared library extension
SYSTEM := $(shell uname)
DYLIB_EXT :=
DYLIB_EXT :=
ifeq ($(SYSTEM),Linux)
DYLIB_EXT := so
DYLIB_FLAGS := -shared

13
src/expr/Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "expr"
version = "0.0.1"
authors = []
[lib]
name = "expr"
path = "expr.rs"
[dependencies]
getopts = "*"
libc = "*"

135
src/expr/expr.rs Normal file
View file

@ -0,0 +1,135 @@
#![crate_name = "expr"]
/*
* This file is part of the uutils coreutils package.
*
* (c) Roman Gafiyatullin <r.gafiyatullin@me.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
extern crate getopts;
extern crate libc;
#[path="../common/util.rs"]
#[macro_use]
mod util;
mod tokens;
mod syntax_tree;
use std::io::{Write};
static NAME: &'static str = "expr";
static VERSION: &'static str = "0.0.1";
pub fn uumain(args: Vec<String>) -> i32 {
// For expr utility we do not want getopts.
// The following usage should work without escaping hyphens: `expr -15 = 1 + 2 \* \( 3 - -4 \)`
if maybe_handle_help_or_version( &args ) { 0 }
else {
let token_strings = args[1..].to_vec();
match process_expr( &token_strings ) {
Ok( expr_result ) => print_expr_ok( &expr_result ),
Err( expr_error ) => print_expr_error( &expr_error )
}
}
}
fn process_expr( token_strings: &Vec<String> ) -> Result< String, String > {
let maybe_tokens = tokens::strings_to_tokens( &token_strings );
let maybe_ast = syntax_tree::tokens_to_ast( maybe_tokens );
let maybe_result = evaluate_ast( maybe_ast );
maybe_result
}
fn print_expr_ok( expr_result: &String ) -> i32 {
println!("{}", expr_result);
if expr_result == "0" || expr_result == "" { 1 }
else { 0 }
}
fn print_expr_error( expr_error: &String ) -> ! {
crash!(2, "{}", expr_error)
}
fn evaluate_ast( maybe_ast: Result<Box<syntax_tree::ASTNode>, String> ) -> Result<String, String> {
if maybe_ast.is_err() { Err( maybe_ast.err().unwrap() ) }
else { maybe_ast.ok().unwrap().evaluate() }
}
fn maybe_handle_help_or_version( args: &Vec<String> ) -> bool {
if args.len() == 2 {
if args[1] == "--help" { print_help(); true }
else if args[1] == "--version" { print_version(); true }
else { false }
}
else { false }
}
fn print_help() {
//! The following is taken from GNU coreutils' "expr --help" output.
print!(
r#"Usage: expr EXPRESSION
or: expr OPTION
--help display this help and exit
--version output version information and exit
Print the value of EXPRESSION to standard output. A blank line below
separates increasing precedence groups. EXPRESSION may be:
ARG1 | ARG2 ARG1 if it is neither null nor 0, otherwise ARG2
ARG1 & ARG2 ARG1 if neither argument is null or 0, otherwise 0
ARG1 < ARG2 ARG1 is less than ARG2
ARG1 <= ARG2 ARG1 is less than or equal to ARG2
ARG1 = ARG2 ARG1 is equal to ARG2
ARG1 != ARG2 ARG1 is unequal to ARG2
ARG1 >= ARG2 ARG1 is greater than or equal to ARG2
ARG1 > ARG2 ARG1 is greater than ARG2
ARG1 + ARG2 arithmetic sum of ARG1 and ARG2
ARG1 - ARG2 arithmetic difference of ARG1 and ARG2
ARG1 * ARG2 arithmetic product of ARG1 and ARG2
ARG1 / ARG2 arithmetic quotient of ARG1 divided by ARG2
ARG1 % ARG2 arithmetic remainder of ARG1 divided by ARG2
STRING : REGEXP [NOT IMPLEMENTED] anchored pattern match of REGEXP in STRING
match STRING REGEXP [NOT IMPLEMENTED] same as STRING : REGEXP
substr STRING POS LENGTH substring of STRING, POS counted from 1
index STRING CHARS index in STRING where any CHARS is found, or 0
length STRING length of STRING
+ TOKEN interpret TOKEN as a string, even if it is a
keyword like 'match' or an operator like '/'
( EXPRESSION ) value of EXPRESSION
Beware that many operators need to be escaped or quoted for shells.
Comparisons are arithmetic if both ARGs are numbers, else lexicographical.
Pattern matches return the string matched between \( and \) or null; if
\( and \) are not used, they return the number of characters matched or 0.
Exit status is 0 if EXPRESSION is neither null nor 0, 1 if EXPRESSION is null
or 0, 2 if EXPRESSION is syntactically invalid, and 3 if an error occurred.
Environment variables:
* EXPR_DEBUG_TOKENS=1 dump expression's tokens
* EXPR_DEBUG_RPN=1 dump expression represented in reverse polish notation
* EXPR_DEBUG_SYA_STEP=1 dump each parser step
* EXPR_DEBUG_AST=1 dump expression represented abstract syntax tree
"#
);
}
fn print_version() {
println!("{} {}", NAME, VERSION);
}

386
src/expr/syntax_tree.rs Normal file
View file

@ -0,0 +1,386 @@
/*
* This file is part of the uutils coreutils package.
*
* (c) Roman Gafiyatullin <r.gafiyatullin@me.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
//!
//! Here we employ shunting-yard algorithm for building AST from tokens according to operators' precedence and associativeness.
//! * https://en.wikipedia.org/wiki/Shunting-yard_algorithm
//!
use tokens::{Token};
type TokenStack = Vec<(usize, Token)>;
type OperandsList = Vec< Box<ASTNode> >;
#[derive(Debug)]
pub enum ASTNode {
Leaf { token_idx: usize, value: String },
Node { token_idx: usize, op_type: String, operands: OperandsList }
}
impl ASTNode {
fn debug_dump( &self ) {
self.debug_dump_impl( 1 );
}
fn debug_dump_impl( &self, depth: usize ) {
for _ in 0..depth {
print!("\t", );
}
match self {
&ASTNode::Leaf{ ref token_idx, ref value } => println!("Leaf( {} ) at #{} ( evaluate -> {:?} )", value, token_idx, self.evaluate()),
&ASTNode::Node{ ref token_idx, ref op_type, ref operands } => {
println!("Node( {} ) at #{} (evaluate -> {:?})", op_type, token_idx, self.evaluate());
for operand in operands {
operand.debug_dump_impl( depth + 1 );
}
}
}
}
fn new_node( token_idx: usize, op_type: &String, operands: OperandsList ) -> Box<ASTNode> {
Box::new( ASTNode::Node{
token_idx: token_idx,
op_type: op_type.clone(),
operands: operands
} )
}
fn new_leaf( token_idx: usize, value: &String ) -> Box<ASTNode> {
Box::new( ASTNode::Leaf{ token_idx: token_idx, value: value.clone() } )
}
pub fn evaluate( &self ) -> Result<String, String> {
match self {
&ASTNode::Leaf{ ref value, .. } => Ok( value.clone() ),
&ASTNode::Node{ ref op_type, .. } =>
match self.operand_values() {
Err( reason ) => Err( reason ),
Ok( operand_values ) =>
match op_type.as_ref() {
"+" => infix_operator_two_ints( |a: i64, b: i64| Ok( a + b ), &operand_values ),
"-" => infix_operator_two_ints( |a: i64, b: i64| Ok( a - b ), &operand_values ),
"*" => infix_operator_two_ints( |a: i64, b: i64| Ok( a * b ), &operand_values ),
"/" => infix_operator_two_ints(
|a: i64, b: i64|
if b == 0 { Err("division by zero".to_string()) }
else { Ok( a / b ) },
&operand_values ),
"%" => infix_operator_two_ints(
|a: i64, b: i64|
if b == 0 { Err("division by zero".to_string()) }
else { Ok( a % b ) },
&operand_values ),
"=" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok( bool_as_int(a == b) ),
|a: &String, b: &String| Ok( bool_as_string(a == b) ),
&operand_values
),
"!=" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok( bool_as_int(a != b) ),
|a: &String, b: &String| Ok( bool_as_string(a != b) ),
&operand_values
),
"<" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok( bool_as_int(a < b) ),
|a: &String, b: &String| Ok( bool_as_string(a < b) ),
&operand_values
),
">" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok( bool_as_int(a > b) ),
|a: &String, b: &String| Ok( bool_as_string(a > b) ),
&operand_values
),
"<=" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok( bool_as_int(a <= b) ),
|a: &String, b: &String| Ok( bool_as_string(a <= b) ),
&operand_values
),
">=" => infix_operator_two_ints_or_two_strings(
|a: i64, b: i64| Ok( bool_as_int(a >= b) ),
|a: &String, b: &String| Ok( bool_as_string(a >= b) ),
&operand_values
),
"length" => prefix_operator_length( &operand_values ),
"index" => prefix_operator_index( &operand_values ),
"substr" => prefix_operator_substr( &operand_values ),
_ => Err(format!("operation not implemented: {}", op_type))
}
}
}
}
pub fn operand_values( &self ) -> Result<Vec<String>, String> {
if let &ASTNode::Node{ ref operands, .. } = self {
let mut out = Vec::with_capacity( operands.len() );
for operand in operands {
match operand.evaluate() {
Ok( value ) => out.push( value ),
Err( reason ) => return Err( reason ),
}
}
Ok( out )
}
else { panic!("Invoked .operand_values(&self) not with ASTNode::Node") }
}
}
pub fn tokens_to_ast( maybe_tokens: Result< Vec<(usize, Token)>, String > ) -> Result<Box<ASTNode>, String> {
if maybe_tokens.is_err() { Err( maybe_tokens.err().unwrap() ) }
else {
let tokens = maybe_tokens.ok().unwrap();
let mut out_stack: TokenStack = Vec::new();
let mut op_stack: TokenStack = Vec::new();
for (token_idx, token) in tokens {
if let Err( reason ) = push_token_to_either_stack( token_idx, &token, &mut out_stack, &mut op_stack ) {
return Err( reason )
}
}
if let Err( reason ) = move_rest_of_ops_to_out( &mut out_stack, &mut op_stack ) {
return Err( reason )
}
assert!( op_stack.is_empty() );
maybe_dump_rpn( &out_stack );
let result = ast_from_rpn( &mut out_stack );
if !out_stack.is_empty() {
Err( "syntax error (fist RPN token does not represent expression AST's root)".to_string() )
}
else {
maybe_dump_ast( &result );
result
}
}
}
fn maybe_dump_ast( result: &Result< Box<ASTNode>, String > ) {
use std::env;
if let Ok( debug_var ) = env::var( "EXPR_DEBUG_AST" ) {
if debug_var == "1" {
println!("EXPR_DEBUG_AST");
match result {
&Ok( ref ast ) => ast.debug_dump(),
&Err( ref reason ) => println!("\terr: {:?}", reason),
}
}
}
}
fn maybe_dump_rpn( rpn: &TokenStack ) {
use std::env;
if let Ok( debug_var ) = env::var( "EXPR_DEBUG_RPN" ) {
if debug_var == "1" {
println!("EXPR_DEBUG_RPN");
for token in rpn {
println!("\t{:?}", token);
}
}
}
}
fn ast_from_rpn( rpn: &mut TokenStack ) -> Result<Box<ASTNode>, String> {
match rpn.pop() {
None => Err( "syntax error (premature end of expression)".to_string() ),
Some( (token_idx, Token::Value{ value }) ) =>
Ok( ASTNode::new_leaf( token_idx, &value ) ),
Some( (token_idx, Token::InfixOp{ value, .. }) ) =>
maybe_ast_node( token_idx, &value, 2, rpn ),
Some( (token_idx, Token::PrefixOp{ value, arity }) ) =>
maybe_ast_node( token_idx, &value, arity, rpn ),
Some( (token_idx, unexpected_token) ) =>
panic!("unexpected token at #{} {:?}", token_idx, unexpected_token),
}
}
fn maybe_ast_node( token_idx: usize, op_type: &String, arity: usize, rpn: &mut TokenStack ) -> Result< Box<ASTNode>, String > {
let mut operands = Vec::with_capacity( arity );
for _ in 0..arity {
match ast_from_rpn( rpn ) {
Err( reason ) => return Err( reason ),
Ok( operand ) => operands.push( operand ),
}
}
operands.reverse();
Ok( ASTNode::new_node( token_idx, op_type, operands ) )
}
fn move_rest_of_ops_to_out( out_stack: &mut TokenStack, op_stack: &mut TokenStack ) -> Result<(), String> {
loop {
match op_stack.pop() {
None => return Ok( () ),
Some( (token_idx, Token::ParOpen) ) => return Err( format!( "syntax error (Mismatched open-parenthesis at #{})", token_idx ) ),
Some( (token_idx, Token::ParClose) ) => return Err( format!( "syntax error (Mismatched close-parenthesis at #{})", token_idx ) ),
Some( other ) => out_stack.push( other )
}
}
}
fn push_token_to_either_stack( token_idx: usize, token: &Token, out_stack: &mut TokenStack, op_stack: &mut TokenStack ) -> Result<(), String> {
let result =
match token {
&Token::Value{ .. } => Ok( out_stack.push( (token_idx, token.clone()) ) ),
&Token::InfixOp{ .. } =>
if op_stack.is_empty() { Ok( op_stack.push( (token_idx, token.clone()) ) ) }
else { push_op_to_stack( token_idx, token, out_stack, op_stack ) },
&Token::PrefixOp{ .. } => Ok( op_stack.push( (token_idx, token.clone()) ) ),
&Token::ParOpen => Ok( op_stack.push( (token_idx, token.clone()) ) ),
&Token::ParClose => move_till_match_paren( out_stack, op_stack )
};
maybe_dump_shunting_yard_step( token_idx, token, out_stack, op_stack, &result );
result
}
fn maybe_dump_shunting_yard_step( token_idx: usize, token: &Token, out_stack: &TokenStack, op_stack: &TokenStack, result: &Result<(), String> ) {
use std::env;
if let Ok( debug_var ) = env::var( "EXPR_DEBUG_SYA_STEP" ) {
if debug_var == "1" {
println!("EXPR_DEBUG_SYA_STEP");
println!("\t{} => {:?}", token_idx, token);
println!("\t\tout: {:?}", out_stack);
println!("\t\top : {:?}", op_stack);
println!("\t\tresult: {:?}", result);
}
}
}
fn push_op_to_stack( token_idx: usize, token: &Token, out_stack: &mut TokenStack, op_stack: &mut TokenStack ) -> Result<(), String> {
if let &Token::InfixOp{ precedence: prec, left_assoc: la, .. } = token {
loop {
match op_stack.last() {
None =>
return Ok( op_stack.push( (token_idx, token.clone()) ) ),
Some( &(_, Token::ParOpen) ) =>
return Ok( op_stack.push( (token_idx, token.clone()) ) ),
Some( &(_, Token::InfixOp{ precedence: prev_prec, .. }) ) =>
if la && prev_prec >= prec
|| !la && prev_prec > prec {
out_stack.push( op_stack.pop().unwrap() )
}
else {
return Ok( op_stack.push( (token_idx, token.clone()) ) )
},
Some( &(_, Token::PrefixOp{ .. }) ) =>
return Ok( op_stack.push( (token_idx, token.clone()) ) ),
Some( _ ) => panic!("Non-operator on op_stack")
}
}
}
else {
panic!("Expected infix-op")
}
}
fn move_till_match_paren( out_stack: &mut TokenStack, op_stack: &mut TokenStack ) -> Result<(), String> {
loop {
match op_stack.pop() {
None => return Err( "syntax error (Mismatched close-parenthesis)".to_string() ),
Some( (_, Token::ParOpen) ) => return Ok( () ),
Some( other ) => out_stack.push( other )
}
}
}
fn infix_operator_two_ints<F>( f: F, values: &Vec<String> ) -> Result<String, String>
where F : Fn( i64, i64 ) -> Result<i64, String>
{
assert!( values.len() == 2 );
if let Some( left ) = values[0].parse::<i64>().ok() {
if let Some( right ) = values[1].parse::<i64>().ok() {
return match f( left, right ) {
Ok(result) => Ok(result.to_string()),
Err(reason) => Err(reason),
}
}
}
Err( "Expected an integer operand".to_string() )
}
fn infix_operator_two_ints_or_two_strings<FI, FS>( fi: FI, fs: FS, values: &Vec<String> ) -> Result<String, String>
where FI : Fn( i64, i64 ) -> Result<i64, String>,
FS : Fn( &String, &String ) -> Result<String, String>
{
assert!( values.len() == 2 );
if let ( Some( a_int ), Some( b_int ) ) =
(
values[0].parse::<i64>().ok(),
values[1].parse::<i64>().ok()
) {
match fi( a_int, b_int ) {
Ok( result ) => Ok(result.to_string()),
Err( reason ) => Err(reason)
}
}
else {
fs( &values[0], &values[1] )
}
}
fn prefix_operator_length( values: &Vec<String> ) -> Result<String, String> {
assert!( values.len() == 1 );
Ok( values[0].len().to_string() )
}
fn prefix_operator_index( values: &Vec<String> ) -> Result<String, String> {
assert!( values.len() == 2 );
let haystack = &values[0];
let needles = &values[1];
let mut current_idx = 0;
for ch_h in haystack.chars() {
current_idx += 1;
for ch_n in needles.chars() {
if ch_n == ch_h {
return Ok( current_idx.to_string() )
}
}
}
Ok( "0".to_string() )
}
fn prefix_operator_substr( values: &Vec<String> ) -> Result<String, String> {
assert!( values.len() == 3 );
let subj = &values[0];
let mut idx = match values[1].parse::<i64>() {
Ok( i ) => i,
Err( _ ) => return Err( "expected integer as POS arg to 'substr'".to_string() ),
};
let mut len = match values[2].parse::<i64>() {
Ok( i ) => i,
Err( _ ) => return Err( "expected integer as LENGTH arg to 'substr'".to_string() ),
};
if idx <= 0 || len <= 0 { return Ok( "".to_string() ) }
let mut out_str = String::new();
for ch in subj.chars() {
idx -= 1;
if idx <= 0 {
if len <= 0 { break; }
len -= 1;
out_str.push( ch );
}
}
Ok( out_str )
}
fn bool_as_int( b: bool ) -> i64 { if b { 1 } else { 0 } }
fn bool_as_string( b: bool ) -> String { if b { "1".to_string() } else { "0".to_string() } }

160
src/expr/tokens.rs Normal file
View file

@ -0,0 +1,160 @@
/*
* This file is part of the uutils coreutils package.
*
* (c) Roman Gafiyatullin <r.gafiyatullin@me.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/
//!
//! The following tokens are present in the expr grammar:
//! * integer literal;
//! * string literal;
//! * infix binary operators;
//! * prefix operators.
//!
//! According to the man-page of expr we have expression split into tokens (each token -- separate CLI-argument).
//! Hence all we need is to map the strings into the Token structures, except for some ugly fiddling with +-escaping.
//!
#[derive(Debug)]
#[derive(Clone)]
pub enum Token {
Value{ value: String },
ParOpen,
ParClose,
InfixOp {
precedence: u8,
left_assoc: bool,
value: String
},
PrefixOp {
arity: usize,
value: String
},
}
impl Token {
fn new_infix_op( v: &String, left_assoc: bool, precedence: u8 ) -> Self {
Token::InfixOp{
left_assoc: left_assoc,
precedence: precedence,
value: v.clone()
}
}
fn new_value( v: &String ) -> Self {
Token::Value{
value: v.clone()
}
}
fn is_infix_plus( &self ) -> bool {
match self {
&Token::InfixOp{ ref value, .. } => value == "+",
_ => false
}
}
fn is_a_number( &self ) -> bool {
match self {
&Token::Value{ ref value, .. } =>
match value.parse::<i64>() {
Ok( _ ) => true,
Err( _ ) => false
},
_ => false,
}
}
fn is_a_close_paren( &self ) -> bool {
match self {
&Token::ParClose => true,
_ => false,
}
}
}
pub fn strings_to_tokens( strings: &Vec<String> ) -> Result< Vec<(usize, Token)>, String > {
let mut tokens_acc = Vec::with_capacity( strings.len() );
let mut tok_idx = 1;
for s in strings {
let token_if_not_escaped =
match s.as_ref() {
"(" => Token::ParOpen,
")" => Token::ParClose,
"^" => Token::new_infix_op( &s, false, 7 ),
":" => Token::new_infix_op( &s, true, 6 ),
"*" => Token::new_infix_op( &s, true, 5 ),
"/" => Token::new_infix_op( &s, true, 5 ),
"%" => Token::new_infix_op( &s, true, 5 ),
"+" => Token::new_infix_op( &s, true, 4 ),
"-" => Token::new_infix_op( &s, true, 4 ),
"=" => Token::new_infix_op( &s, true, 3 ),
"!=" => Token::new_infix_op( &s, true, 3 ),
"<" => Token::new_infix_op( &s, true, 3 ),
">" => Token::new_infix_op( &s, true, 3 ),
"<=" => Token::new_infix_op( &s, true, 3 ),
">=" => Token::new_infix_op( &s, true, 3 ),
"&" => Token::new_infix_op( &s, true, 2 ),
"|" => Token::new_infix_op( &s, true, 1 ),
"match" => Token::PrefixOp{ arity: 2, value: s.clone() },
"substr" => Token::PrefixOp{ arity: 3, value: s.clone() },
"index" => Token::PrefixOp{ arity: 2, value: s.clone() },
"length" => Token::PrefixOp{ arity: 1, value: s.clone() },
_ => Token::new_value( &s ),
};
push_token_if_not_escaped( &mut tokens_acc, tok_idx, token_if_not_escaped, &s );
tok_idx += 1;
}
maybe_dump_tokens_acc( &tokens_acc );
Ok( tokens_acc )
}
fn maybe_dump_tokens_acc( tokens_acc: &Vec<(usize, Token)> ) {
use std::env;
if let Ok(debug_var) = env::var( "EXPR_DEBUG_TOKENS" ) {
if debug_var == "1" {
println!("EXPR_DEBUG_TOKENS");
for token in tokens_acc {
println!("\t{:?}", token);
}
}
}
}
fn push_token_if_not_escaped( acc: &mut Vec<(usize, Token)>, tok_idx: usize, token: Token, s: &String ) {
// Smells heuristics... :(
let prev_is_plus =
match acc.last() {
None => false,
Some( ref t ) => t.1.is_infix_plus(),
};
let should_use_as_escaped =
if prev_is_plus && acc.len() >= 2 {
let pre_prev = &acc[acc.len() - 2];
! ( pre_prev.1.is_a_number() || pre_prev.1.is_a_close_paren() )
}
else if prev_is_plus { true }
else { false };
if should_use_as_escaped {
acc.pop();
acc.push( (tok_idx, Token::new_value( s )) )
}
else {
acc.push( (tok_idx, token) )
}
}