echo: Support octal, hexadecimal and unicode escape sequences

2025-07-25 04:57:44 +00:00 · 2021-08-25 23:57:02 +08:00 · 2021-08-25 23:57:02 +08:00 · c9b384da92
commit c9b384da92
parent c2f62a03ff
2 changed files with 117 additions and 47 deletions
--- a/Userland/Utilities/echo.cpp
+++ b/Userland/Utilities/echo.cpp
@ -4,58 +4,92 @@
 * SPDX-License-Identifier: BSD-2-Clause
 */

-#include <AK/String.h>
+#include <AK/CharacterTypes.h>
+#include <AK/GenericLexer.h>
 #include <LibCore/ArgsParser.h>
 #include <stdio.h>
 #include <unistd.h>

-static char backslash_escaped_char(char c)
+static u8 parse_octal_number(GenericLexer& lexer)
 {
-    switch (c) {
-    case '\\':
-        return c;
-    // `\"` produces `"` with printf(1), but `\"` with echo(1)
-    case 'a':
-        return '\a';
-    case 'b':
-        return '\b';
-    case 'e':
-        return '\e';
-    case 'f':
-        return '\f';
-    case 'n':
-        return '\n';
-    case 'r':
-        return '\r';
-    case 't':
-        return '\t';
-    case 'v':
-        return '\v';
-    default:
-        return c;
+    u32 value = 0;
+    for (size_t count = 0; count < 3; ++count) {
+        auto c = lexer.peek();
+        if (!(c >= '0' && c <= '7'))
+            break;
+        value = value * 8 + (c - '0');
+        lexer.consume();
    }
+    clamp(value, 0, 255);
+    return value;
 }

-static String interpret_backslash_escapes(String s)
+static Optional<u8> parse_hex_number(GenericLexer& lexer)
 {
+    u8 value = 0;
+    for (size_t count = 0; count < 2; ++count) {
+        auto c = lexer.peek();
+        if (!is_ascii_hex_digit(c))
+            return {};
+        value = value * 16 + parse_ascii_hex_digit(c);
+        lexer.consume();
+    }
+    return value;
+}
+
+static String interpret_backslash_escapes(StringView string, bool& no_trailing_newline)
+{
+    static constexpr auto escape_map = "a\ab\be\ef\fn\nr\rt\tv\v"sv;
+    static constexpr auto unescaped_chars = "\a\b\e\f\n\r\t\v\\"sv;
+
    StringBuilder builder;
+    GenericLexer lexer { string };

-    for (size_t i = 0; i < s.length();) {
-        if (char c = s[i++]; c != '\\') {
-            builder.append(c);
-            continue;
+    while (!lexer.is_eof()) {
+        auto this_index = lexer.tell();
+        auto this_char = lexer.consume();
+        if (this_char == '\\') {
+            if (lexer.is_eof()) {
+                builder.append('\\');
+                break;
+            }
+            auto next_char = lexer.peek();
+            if (next_char == 'c') {
+                no_trailing_newline = true;
+                break;
+            }
+            if (next_char == '0') {
+                lexer.consume();
+                auto octal_number = parse_octal_number(lexer);
+                builder.append(octal_number);
+            } else if (next_char == 'x') {
+                lexer.consume();
+                auto maybe_hex_number = parse_hex_number(lexer);
+                if (!maybe_hex_number.has_value()) {
+                    auto bad_substring = string.substring_view(this_index, lexer.tell() - this_index);
+                    builder.append(bad_substring);
+                } else {
+                    builder.append(maybe_hex_number.release_value());
+                }
+            } else if (next_char == 'u') {
+                lexer.retreat();
+                auto maybe_code_point = lexer.consume_escaped_code_point();
+                if (maybe_code_point.is_error()) {
+                    auto bad_substring = string.substring_view(this_index, lexer.tell() - this_index);
+                    builder.append(bad_substring);
+                } else {
+                    builder.append_code_point(maybe_code_point.release_value());
+                }
+            } else {
+                lexer.retreat();
+                auto consumed_char = lexer.consume_escaped_character('\\', escape_map);
+                if (!unescaped_chars.contains(consumed_char))
+                    builder.append('\\');
+                builder.append(consumed_char);
+            }
+        } else {
+            builder.append(this_char);
        }
-        if (i == s.length()) {
-            // Last character of string is '\' -- output it verbatim.
-            builder.append('\\');
-        }
-
-        char c = s[i++];
-        if (c == 'c') // `\c` suppresses further output.
-            break;
-        // FIXME: \0ooo, \xHH, \uHHHH, \UHHHHHHHH should produce characters if followed by
-        // enough digits.
-        builder.append(backslash_escaped_char(c));
    }

    return builder.build();
@ -68,19 +102,25 @@ int main(int argc, char** argv)
        return 1;
    }

-    Vector<const char*> values;
+    Vector<const char*> text;
    bool no_trailing_newline = false;
    bool should_interpret_backslash_escapes = false;

    Core::ArgsParser args_parser;
    args_parser.add_option(no_trailing_newline, "Do not output a trailing newline", nullptr, 'n');
    args_parser.add_option(should_interpret_backslash_escapes, "Interpret backslash escapes", nullptr, 'e');
-    args_parser.add_positional_argument(values, "Values to print out", "string", Core::ArgsParser::Required::No);
+    args_parser.add_positional_argument(text, "Text to print out", "text", Core::ArgsParser::Required::No);
+    args_parser.set_stop_on_first_non_option(true);
    args_parser.parse(argc, argv);

-    String output = String::join(' ', values);
+    if (text.is_empty()) {
+        outln();
+        return 0;
+    }
+
+    auto output = String::join(' ', text);
    if (should_interpret_backslash_escapes)
-        output = interpret_backslash_escapes(move(output));
+        output = interpret_backslash_escapes(output, no_trailing_newline);
    out("{}", output);
    if (!no_trailing_newline)
        outln();