From d821719c67a6f3a6f7d67d4f1c04527451dd7bb1 Mon Sep 17 00:00:00 2001
From: Michael Debertol <michael.debertol@gmail.com>
Date: Sat, 29 May 2021 23:25:23 +0200
Subject: [PATCH] expr: support arbitrary precision integers (#2271)

* expr: support arbitrary precision integers

Instead of i64s we now use BigInts for integer operations. This means
that no result or input can be out of range.
The representation of integer flags was changed from i64 to u8 to make
their intention clearer.

* expr: allow big numbers as arguments as well

Also adds some tests

* expr: use num-traits to check bigints for 0 and 1

* expr: remove obsolete refs

match ergonomics made these avoidable.

* formatting

Co-authored-by: Sylvestre Ledru <sylvestre@debian.org>
---
 Cargo.lock                     |  13 ++++
 src/uu/expr/Cargo.toml         |   2 +
 src/uu/expr/src/syntax_tree.rs | 107 +++++++++++++++------------------
 src/uu/expr/src/tokens.rs      |  12 ++--
 tests/by-util/test_expr.rs     |  78 ++++++++++++++++++------
 5 files changed, 128 insertions(+), 84 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6d2a47c84..997e1f458 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -859,6 +859,17 @@ version = "0.1.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
 
+[[package]]
+name = "num-bigint"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e0d047c1062aa51e256408c560894e5251f08925980e53cf1aa5bd00eec6512"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
+
 [[package]]
 name = "num-integer"
 version = "0.1.44"
@@ -1812,6 +1823,8 @@ name = "uu_expr"
 version = "0.0.6"
 dependencies = [
  "libc",
+ "num-bigint",
+ "num-traits",
  "onig",
  "uucore",
  "uucore_procs",
diff --git a/src/uu/expr/Cargo.toml b/src/uu/expr/Cargo.toml
index c535df7ce..ed992bf71 100644
--- a/src/uu/expr/Cargo.toml
+++ b/src/uu/expr/Cargo.toml
@@ -16,6 +16,8 @@ path = "src/expr.rs"
 
 [dependencies]
 libc = "0.2.42"
+num-bigint = "0.4.0"
+num-traits = "0.2.14"
 onig = "~4.3.2"
 uucore = { version=">=0.0.8", package="uucore", path="../../uucore" }
 uucore_procs = { version=">=0.0.5", package="uucore_procs", path="../../uucore_procs" }
diff --git a/src/uu/expr/src/syntax_tree.rs b/src/uu/expr/src/syntax_tree.rs
index a75f4c742..b72d78729 100644
--- a/src/uu/expr/src/syntax_tree.rs
+++ b/src/uu/expr/src/syntax_tree.rs
@@ -12,6 +12,8 @@
 
 // spell-checker:ignore (ToDO) binop binops ints paren prec
 
+use num_bigint::BigInt;
+use num_traits::{One, Zero};
 use onig::{Regex, RegexOptions, Syntax};
 
 use crate::tokens::Token;
@@ -39,20 +41,17 @@ impl AstNode {
         for _ in 0..depth {
             print!("\t",);
         }
-        match *self {
-            AstNode::Leaf {
-                ref token_idx,
-                ref value,
-            } => println!(
+        match self {
+            AstNode::Leaf { token_idx, value } => println!(
                 "Leaf( {} ) at #{} ( evaluate -> {:?} )",
                 value,
                 token_idx,
                 self.evaluate()
             ),
             AstNode::Node {
-                ref token_idx,
-                ref op_type,
-                ref operands,
+                token_idx,
+                op_type,
+                operands,
             } => {
                 println!(
                     "Node( {} ) at #{} (evaluate -> {:?})",
@@ -81,36 +80,33 @@ impl AstNode {
         })
     }
     pub fn evaluate(&self) -> Result<String, String> {
-        match *self {
-            AstNode::Leaf { ref value, .. } => Ok(value.clone()),
-            AstNode::Node { ref op_type, .. } => match self.operand_values() {
+        match self {
+            AstNode::Leaf { value, .. } => Ok(value.clone()),
+            AstNode::Node { op_type, .. } => match self.operand_values() {
                 Err(reason) => Err(reason),
                 Ok(operand_values) => match op_type.as_ref() {
-                    "+" => infix_operator_two_ints(
-                        |a: i64, b: i64| checked_binop(|| a.checked_add(b), "+"),
-                        &operand_values,
-                    ),
-                    "-" => infix_operator_two_ints(
-                        |a: i64, b: i64| checked_binop(|| a.checked_sub(b), "-"),
-                        &operand_values,
-                    ),
-                    "*" => infix_operator_two_ints(
-                        |a: i64, b: i64| checked_binop(|| a.checked_mul(b), "*"),
-                        &operand_values,
-                    ),
+                    "+" => {
+                        infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a + b), &operand_values)
+                    }
+                    "-" => {
+                        infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a - b), &operand_values)
+                    }
+                    "*" => {
+                        infix_operator_two_ints(|a: BigInt, b: BigInt| Ok(a * b), &operand_values)
+                    }
                     "/" => infix_operator_two_ints(
-                        |a: i64, b: i64| {
-                            if b == 0 {
+                        |a: BigInt, b: BigInt| {
+                            if b.is_zero() {
                                 Err("division by zero".to_owned())
                             } else {
-                                checked_binop(|| a.checked_div(b), "/")
+                                Ok(a / b)
                             }
                         },
                         &operand_values,
                     ),
                     "%" => infix_operator_two_ints(
-                        |a: i64, b: i64| {
-                            if b == 0 {
+                        |a: BigInt, b: BigInt| {
+                            if b.is_zero() {
                                 Err("division by zero".to_owned())
                             } else {
                                 Ok(a % b)
@@ -119,32 +115,32 @@ impl AstNode {
                         &operand_values,
                     ),
                     "=" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a == b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a == b)),
                         |a: &String, b: &String| Ok(bool_as_string(a == b)),
                         &operand_values,
                     ),
                     "!=" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a != b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a != b)),
                         |a: &String, b: &String| Ok(bool_as_string(a != b)),
                         &operand_values,
                     ),
                     "<" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a < b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a < b)),
                         |a: &String, b: &String| Ok(bool_as_string(a < b)),
                         &operand_values,
                     ),
                     ">" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a > b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a > b)),
                         |a: &String, b: &String| Ok(bool_as_string(a > b)),
                         &operand_values,
                     ),
                     "<=" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a <= b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a <= b)),
                         |a: &String, b: &String| Ok(bool_as_string(a <= b)),
                         &operand_values,
                     ),
                     ">=" => infix_operator_two_ints_or_two_strings(
-                        |a: i64, b: i64| Ok(bool_as_int(a >= b)),
+                        |a: BigInt, b: BigInt| Ok(bool_as_int(a >= b)),
                         |a: &String, b: &String| Ok(bool_as_string(a >= b)),
                         &operand_values,
                     ),
@@ -161,7 +157,7 @@ impl AstNode {
         }
     }
     pub fn operand_values(&self) -> Result<Vec<String>, String> {
-        if let AstNode::Node { ref operands, .. } = *self {
+        if let AstNode::Node { operands, .. } = self {
             let mut out = Vec::with_capacity(operands.len());
             for operand in operands {
                 match operand.evaluate() {
@@ -217,9 +213,9 @@ fn maybe_dump_ast(result: &Result<Box<AstNode>, String>) {
     if let Ok(debug_var) = env::var("EXPR_DEBUG_AST") {
         if debug_var == "1" {
             println!("EXPR_DEBUG_AST");
-            match *result {
-                Ok(ref ast) => ast.debug_dump(),
-                Err(ref reason) => println!("\terr: {:?}", reason),
+            match result {
+                Ok(ast) => ast.debug_dump(),
+                Err(reason) => println!("\terr: {:?}", reason),
             }
         }
     }
@@ -304,7 +300,7 @@ fn push_token_to_either_stack(
     out_stack: &mut TokenStack,
     op_stack: &mut TokenStack,
 ) -> Result<(), String> {
-    let result = match *token {
+    let result = match token {
         Token::Value { .. } => {
             out_stack.push((token_idx, token.clone()));
             Ok(())
@@ -420,24 +416,14 @@ fn move_till_match_paren(
     }
 }
 
-fn checked_binop<F: Fn() -> Option<T>, T>(cb: F, op: &str) -> Result<T, String> {
-    match cb() {
-        Some(v) => Ok(v),
-        None => Err(format!("{}: Numerical result out of range", op)),
-    }
-}
-
 fn infix_operator_two_ints<F>(f: F, values: &[String]) -> Result<String, String>
 where
-    F: Fn(i64, i64) -> Result<i64, String>,
+    F: Fn(BigInt, BigInt) -> Result<BigInt, String>,
 {
     assert!(values.len() == 2);
-    if let Ok(left) = values[0].parse::<i64>() {
-        if let Ok(right) = values[1].parse::<i64>() {
-            return match f(left, right) {
-                Ok(result) => Ok(result.to_string()),
-                Err(reason) => Err(reason),
-            };
+    if let Ok(left) = values[0].parse::<BigInt>() {
+        if let Ok(right) = values[1].parse::<BigInt>() {
+            return f(left, right).map(|big_int| big_int.to_string());
         }
     }
     Err("Expected an integer operand".to_string())
@@ -449,13 +435,14 @@ fn infix_operator_two_ints_or_two_strings<FI, FS>(
     values: &[String],
 ) -> Result<String, String>
 where
-    FI: Fn(i64, i64) -> Result<i64, String>,
+    FI: Fn(BigInt, BigInt) -> Result<u8, String>,
     FS: Fn(&String, &String) -> Result<String, String>,
 {
     assert!(values.len() == 2);
-    if let (Some(a_int), Some(b_int)) =
-        (values[0].parse::<i64>().ok(), values[1].parse::<i64>().ok())
-    {
+    if let (Some(a_int), Some(b_int)) = (
+        values[0].parse::<BigInt>().ok(),
+        values[1].parse::<BigInt>().ok(),
+    ) {
         match fi(a_int, b_int) {
             Ok(result) => Ok(result.to_string()),
             Err(reason) => Err(reason),
@@ -541,7 +528,7 @@ fn prefix_operator_substr(values: &[String]) -> String {
     subj.chars().skip(idx).take(len).collect()
 }
 
-fn bool_as_int(b: bool) -> i64 {
+fn bool_as_int(b: bool) -> u8 {
     if b {
         1
     } else {
@@ -559,8 +546,8 @@ fn value_as_bool(s: &str) -> bool {
     if s.is_empty() {
         return false;
     }
-    match s.parse::<i64>() {
-        Ok(n) => n != 0,
+    match s.parse::<BigInt>() {
+        Ok(n) => n.is_one(),
         Err(_) => true,
     }
 }
diff --git a/src/uu/expr/src/tokens.rs b/src/uu/expr/src/tokens.rs
index 6056e4ba1..6f2795588 100644
--- a/src/uu/expr/src/tokens.rs
+++ b/src/uu/expr/src/tokens.rs
@@ -18,6 +18,8 @@
 
 // spell-checker:ignore (ToDO) paren
 
+use num_bigint::BigInt;
+
 #[derive(Debug, Clone)]
 pub enum Token {
     Value {
@@ -51,14 +53,14 @@ impl Token {
     }
 
     fn is_infix_plus(&self) -> bool {
-        match *self {
-            Token::InfixOp { ref value, .. } => value == "+",
+        match self {
+            Token::InfixOp { value, .. } => value == "+",
             _ => false,
         }
     }
     fn is_a_number(&self) -> bool {
-        match *self {
-            Token::Value { ref value, .. } => value.parse::<i64>().is_ok(),
+        match self {
+            Token::Value { value, .. } => value.parse::<BigInt>().is_ok(),
             _ => false,
         }
     }
@@ -142,7 +144,7 @@ fn push_token_if_not_escaped(acc: &mut Vec<(usize, Token)>, tok_idx: usize, toke
     // Smells heuristics... :(
     let prev_is_plus = match acc.last() {
         None => false,
-        Some(ref t) => t.1.is_infix_plus(),
+        Some(t) => t.1.is_infix_plus(),
     };
     let should_use_as_escaped = if prev_is_plus && acc.len() >= 2 {
         let pre_prev = &acc[acc.len() - 2];
diff --git a/tests/by-util/test_expr.rs b/tests/by-util/test_expr.rs
index f20739e13..30e3016a3 100644
--- a/tests/by-util/test_expr.rs
+++ b/tests/by-util/test_expr.rs
@@ -2,55 +2,95 @@ use crate::common::util::*;
 
 #[test]
 fn test_simple_arithmetic() {
-    new_ucmd!().args(&["1", "+", "1"]).run().stdout_is("2\n");
+    new_ucmd!()
+        .args(&["1", "+", "1"])
+        .succeeds()
+        .stdout_only("2\n");
 
-    new_ucmd!().args(&["1", "-", "1"]).run().stdout_is("0\n");
+    new_ucmd!()
+        .args(&["1", "-", "1"])
+        .fails()
+        .status_code(1)
+        .stdout_only("0\n");
 
-    new_ucmd!().args(&["3", "*", "2"]).run().stdout_is("6\n");
+    new_ucmd!()
+        .args(&["3", "*", "2"])
+        .succeeds()
+        .stdout_only("6\n");
 
-    new_ucmd!().args(&["4", "/", "2"]).run().stdout_is("2\n");
+    new_ucmd!()
+        .args(&["4", "/", "2"])
+        .succeeds()
+        .stdout_only("2\n");
 }
 
 #[test]
 fn test_complex_arithmetic() {
-    let run = new_ucmd!()
+    new_ucmd!()
         .args(&["9223372036854775807", "+", "9223372036854775807"])
-        .run();
-    run.stdout_is("");
-    run.stderr_is("expr: +: Numerical result out of range");
+        .succeeds()
+        .stdout_only("18446744073709551614\n");
 
-    let run = new_ucmd!().args(&["9", "/", "0"]).run();
-    run.stdout_is("");
-    run.stderr_is("expr: division by zero");
+    new_ucmd!()
+        .args(&[
+            "92233720368547758076549841651981984981498415651",
+            "%",
+            "922337203685",
+        ])
+        .succeeds()
+        .stdout_only("533691697086\n");
+
+    new_ucmd!()
+        .args(&[
+            "92233720368547758076549841651981984981498415651",
+            "*",
+            "922337203685",
+        ])
+        .succeeds()
+        .stdout_only("85070591730190566808700855121818604965830915152801178873935\n");
+
+    new_ucmd!()
+        .args(&[
+            "92233720368547758076549841651981984981498415651",
+            "-",
+            "922337203685",
+        ])
+        .succeeds()
+        .stdout_only("92233720368547758076549841651981984059161211966\n");
+
+    new_ucmd!()
+        .args(&["9", "/", "0"])
+        .fails()
+        .stderr_only("expr: division by zero\n");
 }
 
 #[test]
 fn test_parenthesis() {
     new_ucmd!()
         .args(&["(", "1", "+", "1", ")", "*", "2"])
-        .run()
-        .stdout_is("4\n");
+        .succeeds()
+        .stdout_only("4\n");
 }
 
 #[test]
 fn test_or() {
     new_ucmd!()
         .args(&["0", "|", "foo"])
-        .run()
-        .stdout_is("foo\n");
+        .succeeds()
+        .stdout_only("foo\n");
 
     new_ucmd!()
         .args(&["foo", "|", "bar"])
-        .run()
-        .stdout_is("foo\n");
+        .succeeds()
+        .stdout_only("foo\n");
 }
 
 #[test]
 fn test_and() {
     new_ucmd!()
         .args(&["foo", "&", "1"])
-        .run()
-        .stdout_is("foo\n");
+        .succeeds()
+        .stdout_only("foo\n");
 
     new_ucmd!().args(&["", "&", "1"]).run().stdout_is("0\n");
 }