/* * This file is part of the uutils coreutils package. * * (c) Roman Gafiyatullin * * For the full copyright and license information, please view the LICENSE * file that was distributed with this source code. */ //! //! Here we employ shunting-yard algorithm for building AST from tokens according to operators' precedence and associativeness. //! * https://en.wikipedia.org/wiki/Shunting-yard_algorithm //! use onig::{Regex, RegexOptions, Syntax}; use tokens::Token; type TokenStack = Vec<(usize, Token)>; pub type OperandsList = Vec>; #[derive(Debug)] pub enum ASTNode { Leaf { token_idx: usize, value: String, }, Node { token_idx: usize, op_type: String, operands: OperandsList, }, } impl ASTNode { fn debug_dump(&self) { self.debug_dump_impl(1); } fn debug_dump_impl(&self, depth: usize) { for _ in 0..depth { print!("\t",); } match *self { ASTNode::Leaf { ref token_idx, ref value, } => println!( "Leaf( {} ) at #{} ( evaluate -> {:?} )", value, token_idx, self.evaluate() ), ASTNode::Node { ref token_idx, ref op_type, ref operands, } => { println!( "Node( {} ) at #{} (evaluate -> {:?})", op_type, token_idx, self.evaluate() ); for operand in operands { operand.debug_dump_impl(depth + 1); } } } } fn new_node(token_idx: usize, op_type: &str, operands: OperandsList) -> Box { Box::new(ASTNode::Node { token_idx, op_type: op_type.into(), operands, }) } fn new_leaf(token_idx: usize, value: &str) -> Box { Box::new(ASTNode::Leaf { token_idx, value: value.into(), }) } pub fn evaluate(&self) -> Result { match *self { ASTNode::Leaf { ref value, .. } => Ok(value.clone()), ASTNode::Node { ref op_type, .. } => match self.operand_values() { Err(reason) => Err(reason), Ok(operand_values) => match op_type.as_ref() { "+" => infix_operator_two_ints( |a: i64, b: i64| checked_binop(|| a.checked_add(b), "+"), &operand_values, ), "-" => infix_operator_two_ints( |a: i64, b: i64| checked_binop(|| a.checked_sub(b), "-"), &operand_values, ), "*" => infix_operator_two_ints( |a: i64, b: i64| checked_binop(|| a.checked_mul(b), "*"), &operand_values, ), "/" => infix_operator_two_ints( |a: i64, b: i64| { if b == 0 { Err("division by zero".to_owned()) } else { checked_binop(|| a.checked_div(b), "/") } }, &operand_values, ), "%" => infix_operator_two_ints( |a: i64, b: i64| { if b == 0 { Err("division by zero".to_owned()) } else { Ok(a % b) } }, &operand_values, ), "=" => infix_operator_two_ints_or_two_strings( |a: i64, b: i64| Ok(bool_as_int(a == b)), |a: &String, b: &String| Ok(bool_as_string(a == b)), &operand_values, ), "!=" => infix_operator_two_ints_or_two_strings( |a: i64, b: i64| Ok(bool_as_int(a != b)), |a: &String, b: &String| Ok(bool_as_string(a != b)), &operand_values, ), "<" => infix_operator_two_ints_or_two_strings( |a: i64, b: i64| Ok(bool_as_int(a < b)), |a: &String, b: &String| Ok(bool_as_string(a < b)), &operand_values, ), ">" => infix_operator_two_ints_or_two_strings( |a: i64, b: i64| Ok(bool_as_int(a > b)), |a: &String, b: &String| Ok(bool_as_string(a > b)), &operand_values, ), "<=" => infix_operator_two_ints_or_two_strings( |a: i64, b: i64| Ok(bool_as_int(a <= b)), |a: &String, b: &String| Ok(bool_as_string(a <= b)), &operand_values, ), ">=" => infix_operator_two_ints_or_two_strings( |a: i64, b: i64| Ok(bool_as_int(a >= b)), |a: &String, b: &String| Ok(bool_as_string(a >= b)), &operand_values, ), "|" => infix_operator_or(&operand_values), "&" => infix_operator_and(&operand_values), ":" | "match" => operator_match(&operand_values), "length" => prefix_operator_length(&operand_values), "index" => prefix_operator_index(&operand_values), "substr" => prefix_operator_substr(&operand_values), _ => Err(format!("operation not implemented: {}", op_type)), }, }, } } pub fn operand_values(&self) -> Result, String> { if let ASTNode::Node { ref operands, .. } = *self { let mut out = Vec::with_capacity(operands.len()); for operand in operands { match operand.evaluate() { Ok(value) => out.push(value), Err(reason) => return Err(reason), } } Ok(out) } else { panic!("Invoked .operand_values(&self) not with ASTNode::Node") } } } pub fn tokens_to_ast( maybe_tokens: Result, String>, ) -> Result, String> { if maybe_tokens.is_err() { Err(maybe_tokens.err().unwrap()) } else { let tokens = maybe_tokens.ok().unwrap(); let mut out_stack: TokenStack = Vec::new(); let mut op_stack: TokenStack = Vec::new(); for (token_idx, token) in tokens { if let Err(reason) = push_token_to_either_stack(token_idx, &token, &mut out_stack, &mut op_stack) { return Err(reason); } } if let Err(reason) = move_rest_of_ops_to_out(&mut out_stack, &mut op_stack) { return Err(reason); } assert!(op_stack.is_empty()); maybe_dump_rpn(&out_stack); let result = ast_from_rpn(&mut out_stack); if !out_stack.is_empty() { Err("syntax error (fist RPN token does not represent expression AST's root)".to_owned()) } else { maybe_dump_ast(&result); result } } } fn maybe_dump_ast(result: &Result, String>) { use std::env; if let Ok(debug_var) = env::var("EXPR_DEBUG_AST") { if debug_var == "1" { println!("EXPR_DEBUG_AST"); match *result { Ok(ref ast) => ast.debug_dump(), Err(ref reason) => println!("\terr: {:?}", reason), } } } } #[allow(clippy::ptr_arg)] fn maybe_dump_rpn(rpn: &TokenStack) { use std::env; if let Ok(debug_var) = env::var("EXPR_DEBUG_RPN") { if debug_var == "1" { println!("EXPR_DEBUG_RPN"); for token in rpn { println!("\t{:?}", token); } } } } fn ast_from_rpn(rpn: &mut TokenStack) -> Result, String> { match rpn.pop() { None => Err("syntax error (premature end of expression)".to_owned()), Some((token_idx, Token::Value { value })) => Ok(ASTNode::new_leaf(token_idx, &value)), Some((token_idx, Token::InfixOp { value, .. })) => { maybe_ast_node(token_idx, &value, 2, rpn) } Some((token_idx, Token::PrefixOp { value, arity })) => { maybe_ast_node(token_idx, &value, arity, rpn) } Some((token_idx, unexpected_token)) => { panic!("unexpected token at #{} {:?}", token_idx, unexpected_token) } } } fn maybe_ast_node( token_idx: usize, op_type: &str, arity: usize, rpn: &mut TokenStack, ) -> Result, String> { let mut operands = Vec::with_capacity(arity); for _ in 0..arity { match ast_from_rpn(rpn) { Err(reason) => return Err(reason), Ok(operand) => operands.push(operand), } } operands.reverse(); Ok(ASTNode::new_node(token_idx, op_type, operands)) } fn move_rest_of_ops_to_out( out_stack: &mut TokenStack, op_stack: &mut TokenStack, ) -> Result<(), String> { loop { match op_stack.pop() { None => return Ok(()), Some((token_idx, Token::ParOpen)) => { return Err(format!( "syntax error (Mismatched open-parenthesis at #{})", token_idx )) } Some((token_idx, Token::ParClose)) => { return Err(format!( "syntax error (Mismatched close-parenthesis at #{})", token_idx )) } Some(other) => out_stack.push(other), } } } fn push_token_to_either_stack( token_idx: usize, token: &Token, out_stack: &mut TokenStack, op_stack: &mut TokenStack, ) -> Result<(), String> { let result = match *token { Token::Value { .. } => { out_stack.push((token_idx, token.clone())); Ok(()) } Token::InfixOp { .. } => { if op_stack.is_empty() { op_stack.push((token_idx, token.clone())); Ok(()) } else { push_op_to_stack(token_idx, token, out_stack, op_stack) } } Token::PrefixOp { .. } => { op_stack.push((token_idx, token.clone())); Ok(()) } Token::ParOpen => { op_stack.push((token_idx, token.clone())); Ok(()) } Token::ParClose => move_till_match_paren(out_stack, op_stack), }; maybe_dump_shunting_yard_step(token_idx, token, out_stack, op_stack, &result); result } #[allow(clippy::ptr_arg)] fn maybe_dump_shunting_yard_step( token_idx: usize, token: &Token, out_stack: &TokenStack, op_stack: &TokenStack, result: &Result<(), String>, ) { use std::env; if let Ok(debug_var) = env::var("EXPR_DEBUG_SYA_STEP") { if debug_var == "1" { println!("EXPR_DEBUG_SYA_STEP"); println!("\t{} => {:?}", token_idx, token); println!("\t\tout: {:?}", out_stack); println!("\t\top : {:?}", op_stack); println!("\t\tresult: {:?}", result); } } } fn push_op_to_stack( token_idx: usize, token: &Token, out_stack: &mut TokenStack, op_stack: &mut TokenStack, ) -> Result<(), String> { if let Token::InfixOp { precedence: prec, left_assoc: la, .. } = *token { loop { match op_stack.last() { None => { op_stack.push((token_idx, token.clone())); return Ok(()); } Some(&(_, Token::ParOpen)) => { op_stack.push((token_idx, token.clone())); return Ok(()); } Some(&( _, Token::InfixOp { precedence: prev_prec, .. }, )) => { if la && prev_prec >= prec || !la && prev_prec > prec { out_stack.push(op_stack.pop().unwrap()) } else { op_stack.push((token_idx, token.clone())); return Ok(()); } } Some(&(_, Token::PrefixOp { .. })) => { op_stack.push((token_idx, token.clone())); return Ok(()); } Some(_) => panic!("Non-operator on op_stack"), } } } else { panic!("Expected infix-op") } } fn move_till_match_paren( out_stack: &mut TokenStack, op_stack: &mut TokenStack, ) -> Result<(), String> { loop { match op_stack.pop() { None => return Err("syntax error (Mismatched close-parenthesis)".to_string()), Some((_, Token::ParOpen)) => return Ok(()), Some(other) => out_stack.push(other), } } } fn checked_binop Option, T>(cb: F, op: &str) -> Result { match cb() { Some(v) => Ok(v), None => Err(format!("{}: Numerical result out of range", op)), } } fn infix_operator_two_ints(f: F, values: &[String]) -> Result where F: Fn(i64, i64) -> Result, { assert!(values.len() == 2); if let Ok(left) = values[0].parse::() { if let Ok(right) = values[1].parse::() { return match f(left, right) { Ok(result) => Ok(result.to_string()), Err(reason) => Err(reason), }; } } Err("Expected an integer operand".to_string()) } fn infix_operator_two_ints_or_two_strings( fi: FI, fs: FS, values: &[String], ) -> Result where FI: Fn(i64, i64) -> Result, FS: Fn(&String, &String) -> Result, { assert!(values.len() == 2); if let (Some(a_int), Some(b_int)) = (values[0].parse::().ok(), values[1].parse::().ok()) { match fi(a_int, b_int) { Ok(result) => Ok(result.to_string()), Err(reason) => Err(reason), } } else { fs(&values[0], &values[1]) } } fn infix_operator_or(values: &[String]) -> Result { assert!(values.len() == 2); if value_as_bool(&values[0]) { Ok(values[0].clone()) } else { Ok(values[1].clone()) } } fn infix_operator_and(values: &[String]) -> Result { if value_as_bool(&values[0]) && value_as_bool(&values[1]) { Ok(values[0].clone()) } else { Ok(0.to_string()) } } fn operator_match(values: &[String]) -> Result { assert!(values.len() == 2); let re = match Regex::with_options(&values[1], RegexOptions::REGEX_OPTION_NONE, Syntax::grep()) { Ok(m) => m, Err(err) => return Err(err.description().to_string()), }; if re.captures_len() > 0 { Ok(match re.captures(&values[0]) { Some(captures) => captures.at(1).unwrap().to_string(), None => "".to_string(), }) } else { Ok(match re.find(&values[0]) { Some((start, end)) => (end - start).to_string(), None => "0".to_string(), }) } } fn prefix_operator_length(values: &[String]) -> Result { assert!(values.len() == 1); Ok(values[0].len().to_string()) } fn prefix_operator_index(values: &[String]) -> Result { assert!(values.len() == 2); let haystack = &values[0]; let needles = &values[1]; for (current_idx, ch_h) in haystack.chars().enumerate() { for ch_n in needles.chars() { if ch_n == ch_h { return Ok(current_idx.to_string()); } } } Ok("0".to_string()) } fn prefix_operator_substr(values: &[String]) -> Result { assert!(values.len() == 3); let subj = &values[0]; let mut idx = match values[1].parse::() { Ok(i) => i, Err(_) => return Err("expected integer as POS arg to 'substr'".to_string()), }; let mut len = match values[2].parse::() { Ok(i) => i, Err(_) => return Err("expected integer as LENGTH arg to 'substr'".to_string()), }; if idx <= 0 || len <= 0 { return Ok("".to_string()); } let mut out_str = String::new(); for ch in subj.chars() { idx -= 1; if idx <= 0 { if len <= 0 { break; } len -= 1; out_str.push(ch); } } Ok(out_str) } fn bool_as_int(b: bool) -> i64 { if b { 1 } else { 0 } } fn bool_as_string(b: bool) -> String { if b { "1".to_string() } else { "0".to_string() } } fn value_as_bool(s: &str) -> bool { if s.is_empty() { return false; } match s.parse::() { Ok(n) => n != 0, Err(_) => true, } }