From 4ede121d31f43a787b32fc4c137582638ea305e1 Mon Sep 17 00:00:00 2001 From: Ali Mohammad Pur Date: Fri, 15 Apr 2022 01:50:36 +0430 Subject: [PATCH] Shell: Add support for regex match patterns We previously allowed globs as match pattern, but for more complex matching needs, it's nice to have regular expressions. And as the existing "name a part of the match" concept maps nicely to named capture groups, we can simply reuse the same code and make groups with names available in the match body. --- Userland/Shell/AST.cpp | 126 +++++++++++++++++++++------------ Userland/Shell/AST.h | 3 +- Userland/Shell/Formatter.cpp | 24 +++++-- Userland/Shell/NodeVisitor.cpp | 6 +- Userland/Shell/Parser.cpp | 119 +++++++++++++++++++++++++++---- Userland/Shell/Parser.h | 12 +++- 6 files changed, 219 insertions(+), 71 deletions(-) diff --git a/Userland/Shell/AST.cpp b/Userland/Shell/AST.cpp index 205a79f1c3..f941f88409 100644 --- a/Userland/Shell/AST.cpp +++ b/Userland/Shell/AST.cpp @@ -2117,8 +2117,15 @@ void MatchExpr::dump(int level) const builder.append(')'); } print_indented(builder.string_view(), level + 2); - for (auto& node : entry.options) - node.dump(level + 3); + entry.options.visit( + [&](NonnullRefPtrVector const& options) { + for (auto& option : options) + option.dump(level + 3); + }, + [&](Vector> const& options) { + for (auto& option : options) + print_indented(String::formatted("(regex: {})", option.pattern_value), level + 3); + }); print_indented("(execute)", level + 2); if (entry.body) entry.body->dump(level + 3); @@ -2136,39 +2143,59 @@ RefPtr MatchExpr::run(RefPtr shell) auto list = value->resolve_as_list(shell); auto list_matches = [&](auto&& pattern, auto& spans) { - if (pattern.size() != list.size()) - return false; - - for (size_t i = 0; i < pattern.size(); ++i) { - Vector mask_spans; - if (!list[i].matches(pattern[i], mask_spans)) + if constexpr (IsSame, Regex>) { + if (list.size() != 1) + return false; + auto& subject = list.first(); + auto match = pattern.match(subject); + if (!match.success) return false; - for (auto& span : mask_spans) - spans.append(list[i].substring(span.start, span.length)); - } - return true; + spans.ensure_capacity(match.n_capture_groups); + for (size_t i = 0; i < match.n_capture_groups; ++i) { + auto& capture = match.capture_group_matches[0][i]; + spans.append(capture.view.to_string()); + } + return true; + } else { + if (pattern.size() != list.size()) + return false; + + for (size_t i = 0; i < pattern.size(); ++i) { + Vector mask_spans; + if (!list[i].matches(pattern[i], mask_spans)) + return false; + for (auto& span : mask_spans) + spans.append(list[i].substring(span.start, span.length)); + } + + return true; + } }; - auto resolve_pattern = [&](auto& option) { - Vector pattern; - if (option.is_glob()) { - pattern.append(static_cast(&option)->text()); - } else if (option.is_bareword()) { - pattern.append(static_cast(&option)->text()); + auto resolve_pattern = [&](auto& option) -> decltype(auto) { + if constexpr (IsSame, Regex>) { + return option; } else { - auto list = option.run(shell); - if (shell && shell->has_any_error()) - return pattern; + Vector pattern; + if (option.is_glob()) { + pattern.append(static_cast(&option)->text()); + } else if (option.is_bareword()) { + pattern.append(static_cast(&option)->text()); + } else { + auto list = option.run(shell); + if (shell && shell->has_any_error()) + return pattern; - option.for_each_entry(shell, [&](auto&& value) { - pattern.extend(value->resolve_as_list(nullptr)); // Note: 'nullptr' incurs special behavior, - // asking the node for a 'raw' value. - return IterationDecision::Continue; - }); + option.for_each_entry(shell, [&](auto&& value) { + pattern.extend(value->resolve_as_list(nullptr)); // Note: 'nullptr' incurs special behavior, + // asking the node for a 'raw' value. + return IterationDecision::Continue; + }); + } + + return pattern; } - - return pattern; }; auto frame = shell->push_frame(String::formatted("match ({})", this)); @@ -2176,24 +2203,31 @@ RefPtr MatchExpr::run(RefPtr shell) shell->set_local_variable(m_expr_name, value, true); for (auto& entry : m_entries) { - for (auto& option : entry.options) { - Vector spans; - if (list_matches(resolve_pattern(option), spans)) { - if (entry.body) { - if (entry.match_names.has_value()) { - size_t i = 0; - for (auto& name : entry.match_names.value()) { - if (spans.size() > i) - shell->set_local_variable(name, make_ref_counted(spans[i]), true); - ++i; + auto result = entry.options.visit([&](auto& options) -> Variant> { + for (auto& option : options) { + Vector spans; + if (list_matches(resolve_pattern(option), spans)) { + if (entry.body) { + if (entry.match_names.has_value()) { + size_t i = 0; + for (auto& name : entry.match_names.value()) { + if (spans.size() > i) + shell->set_local_variable(name, make_ref_counted(spans[i]), true); + ++i; + } } + return entry.body->run(shell); } - return entry.body->run(shell); - } else { - return make_ref_counted({}); + return RefPtr(make_ref_counted({})); } } - } + return IterationDecision::Continue; + }); + if (result.has() && result.get() == IterationDecision::Break) + break; + + if (result.has>()) + return move(result).get>(); } shell->raise_error(Shell::ShellError::EvaluatedSyntaxError, "Non-exhaustive match rules!", position()); @@ -2211,8 +2245,12 @@ void MatchExpr::highlight_in_editor(Line::Editor& editor, Shell& shell, Highligh for (auto& entry : m_entries) { metadata.is_first_in_list = false; - for (auto& option : entry.options) - option.highlight_in_editor(editor, shell, metadata); + entry.options.visit( + [&](NonnullRefPtrVector& node_options) { + for (auto& option : node_options) + option.highlight_in_editor(editor, shell, metadata); + }, + [](auto&) {}); metadata.is_first_in_list = true; if (entry.body) diff --git a/Userland/Shell/AST.h b/Userland/Shell/AST.h index c3290c080d..9bb16dc033 100644 --- a/Userland/Shell/AST.h +++ b/Userland/Shell/AST.h @@ -17,6 +17,7 @@ #include #include #include +#include namespace Shell::AST { @@ -1051,7 +1052,7 @@ private: }; struct MatchEntry { - NonnullRefPtrVector options; + Variant, Vector>> options; Optional> match_names; Optional match_as_position; Vector pipe_positions; diff --git a/Userland/Shell/Formatter.cpp b/Userland/Shell/Formatter.cpp index f005a2c71d..5f3ab9d840 100644 --- a/Userland/Shell/Formatter.cpp +++ b/Userland/Shell/Formatter.cpp @@ -583,12 +583,24 @@ void Formatter::visit(const AST::MatchExpr* node) insert_separator(); first_entry = false; auto first = true; - for (auto& option : entry.options) { - if (!first) - current_builder().append(" | "); - first = false; - option.visit(*this); - } + entry.options.visit( + [&](NonnullRefPtrVector const& patterns) { + for (auto& option : patterns) { + if (!first) + current_builder().append(" | "); + first = false; + option.visit(*this); + } + }, + [&](Vector> const& patterns) { + for (auto& option : patterns) { + if (!first) + current_builder().append(" | "); + first = false; + auto node = make_ref_counted(AST::Position {}, option.pattern_value); + node->visit(*this); + } + }); current_builder().append(' '); if (entry.match_names.has_value() && !entry.match_names.value().is_empty()) { diff --git a/Userland/Shell/NodeVisitor.cpp b/Userland/Shell/NodeVisitor.cpp index 4c3c16d87a..834c78a3f0 100644 --- a/Userland/Shell/NodeVisitor.cpp +++ b/Userland/Shell/NodeVisitor.cpp @@ -141,8 +141,10 @@ void NodeVisitor::visit(const AST::MatchExpr* node) { node->matched_expr()->visit(*this); for (auto& entry : node->entries()) { - for (auto& option : entry.options) - option.visit(*this); + if (auto* ptr = entry.options.get_pointer>()) { + for (auto& option : *ptr) + option.visit(*this); + } if (entry.body) entry.body->visit(*this); } diff --git a/Userland/Shell/Parser.cpp b/Userland/Shell/Parser.cpp index 68d189ce04..22826199fb 100644 --- a/Userland/Shell/Parser.cpp +++ b/Userland/Shell/Parser.cpp @@ -84,9 +84,9 @@ bool Parser::expect(StringView expected) } template -NonnullRefPtr Parser::create(Args... args) +NonnullRefPtr Parser::create(Args&&... args) { - return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, args...)); + return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, forward(args)...)); } [[nodiscard]] OwnPtr Parser::push_start() @@ -892,10 +892,10 @@ RefPtr Parser::parse_match_expr() for (;;) { auto entry = parse_match_entry(); consume_while(is_any_of(" \t\n")); - if (entry.options.is_empty()) + if (entry.options.visit([](auto& x) { return x.is_empty(); })) break; - entries.append(entry); + entries.append(move(entry)); } consume_while(is_any_of(" \t\n")); @@ -916,15 +916,32 @@ AST::MatchEntry Parser::parse_match_entry() auto rule_start = push_start(); NonnullRefPtrVector patterns; + Vector> regexps; Vector pipe_positions; Optional> match_names; Optional match_as_position; + enum { + Regex, + Glob, + } pattern_kind; - auto pattern = parse_match_pattern(); - if (!pattern) - return { {}, {}, {}, {}, create("Expected a pattern in 'match' body", true) }; + consume_while(is_any_of(" \t\n")); - patterns.append(pattern.release_nonnull()); + auto regex_pattern = parse_regex_pattern(); + if (regex_pattern.has_value()) { + if (auto error = regex_pattern.value().parser_result.error; error != regex::Error::NoError) + return { NonnullRefPtrVector {}, {}, {}, {}, create(regex::get_error_string(error), false) }; + + pattern_kind = Regex; + regexps.append(regex_pattern.release_value()); + } else { + auto glob_pattern = parse_match_pattern(); + if (!glob_pattern) + return { NonnullRefPtrVector {}, {}, {}, {}, create("Expected a pattern in 'match' body", true) }; + + pattern_kind = Glob; + patterns.append(glob_pattern.release_nonnull()); + } consume_while(is_any_of(" \t\n")); @@ -934,14 +951,28 @@ AST::MatchEntry Parser::parse_match_entry() while (expect('|')) { pipe_positions.append({ previous_pipe_start_position, m_offset, previous_pipe_start_line, line() }); consume_while(is_any_of(" \t\n")); - auto pattern = parse_match_pattern(); - if (!pattern) { - error = create("Expected a pattern to follow '|' in 'match' body", true); + switch (pattern_kind) { + case Regex: { + auto pattern = parse_regex_pattern(); + if (!pattern.has_value()) { + error = create("Expected a regex pattern to follow '|' in 'match' body", true); + break; + } + regexps.append(pattern.release_value()); break; } - consume_while(is_any_of(" \t\n")); + case Glob: { + auto pattern = parse_match_pattern(); + if (!pattern) { + error = create("Expected a pattern to follow '|' in 'match' body", true); + break; + } + patterns.append(pattern.release_nonnull()); + break; + } + } - patterns.append(pattern.release_nonnull()); + consume_while(is_any_of(" \t\n")); previous_pipe_start_line = line(); previous_pipe_start_position = m_offset; @@ -951,7 +982,7 @@ AST::MatchEntry Parser::parse_match_entry() auto as_start_position = m_offset; auto as_start_line = line(); - if (expect("as")) { + if (pattern_kind == Glob && expect("as")) { match_as_position = AST::Position { as_start_position, m_offset, as_start_line, line() }; consume_while(is_any_of(" \t\n")); if (!expect('(')) { @@ -975,6 +1006,31 @@ AST::MatchEntry Parser::parse_match_entry() consume_while(is_any_of(" \t\n")); } + if (pattern_kind == Regex) { + Vector names; + for (auto& regex : regexps) { + if (names.is_empty()) { + for (auto& name : regex.parser_result.capture_groups) + names.append(name); + } else { + size_t index = 0; + for (auto& name : regex.parser_result.capture_groups) { + if (names.size() <= index) { + names.append(name); + continue; + } + + if (names[index] != name) { + if (!error) + error = create("Alternative regex patterns must have the same capture groups", false); + break; + } + } + } + } + match_names = move(names); + } + if (!expect('{')) { if (!error) error = create("Expected an open brace '{' to start a match entry body", true); @@ -992,7 +1048,10 @@ AST::MatchEntry Parser::parse_match_entry() else if (error) body = error; - return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) }; + if (pattern_kind == Glob) + return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) }; + + return { move(regexps), move(match_names), move(match_as_position), move(pipe_positions), move(body) }; } RefPtr Parser::parse_match_pattern() @@ -1000,6 +1059,36 @@ RefPtr Parser::parse_match_pattern() return parse_expression(); } +Optional> Parser::parse_regex_pattern() +{ + auto rule_start = push_start(); + + auto start = m_offset; + if (!expect("(?:") && !expect("(?<")) + return {}; + + size_t open_parens = 1; + while (open_parens > 0) { + if (at_end()) + break; + + if (next_is("(")) + ++open_parens; + else if (next_is(")")) + --open_parens; + consume(); + } + + if (open_parens != 0) { + restore_to(*rule_start); + return {}; + } + + auto end = m_offset; + auto pattern = m_input.substring_view(start, end - start); + return Regex(pattern); +} + RefPtr Parser::parse_redirection() { auto rule_start = push_start(); diff --git a/Userland/Shell/Parser.h b/Userland/Shell/Parser.h index ec8ebfc919..41a2a333fe 100644 --- a/Userland/Shell/Parser.h +++ b/Userland/Shell/Parser.h @@ -25,7 +25,7 @@ public: RefPtr parse(); /// Parse the given string *as* an expression - /// that is to forefully enclose it in double-quotes. + /// that is to forcefully enclose it in double-quotes. RefPtr parse_as_single_expression(); NonnullRefPtrVector parse_as_multiple_expressions(); @@ -77,6 +77,7 @@ private: RefPtr parse_match_expr(); AST::MatchEntry parse_match_entry(); RefPtr parse_match_pattern(); + Optional> parse_regex_pattern(); RefPtr parse_redirection(); RefPtr parse_list_expression(); RefPtr parse_expression(); @@ -98,7 +99,7 @@ private: bool parse_heredoc_entries(); template - NonnullRefPtr create(Args... args); + NonnullRefPtr create(Args&&... args); void set_end_condition(OwnPtr> condition) { m_end_condition = move(condition); } bool at_end() const @@ -228,10 +229,15 @@ subshell :: '{' toplevel '}' match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}' match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}' + | regex_pattern ws* '{' toplevel '}' identifier_list :: '(' (identifier ws*)* ')' -match_pattern :: expression (ws* '|' ws* expression)* +regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)* + +match_pattern :: expression (ws* '|' ws* expression)* + +regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses } command :: redirection command | list_expression command?