1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 08:57:34 +00:00

Shell: Add support for regex match patterns

We previously allowed globs as match pattern, but for more complex
matching needs, it's nice to have regular expressions.
And as the existing "name a part of the match" concept maps nicely to
named capture groups, we can simply reuse the same code and make groups
with names available in the match body.
This commit is contained in:
Ali Mohammad Pur 2022-04-15 01:50:36 +04:30 committed by Ali Mohammad Pur
parent 6aceec4535
commit 4ede121d31
6 changed files with 219 additions and 71 deletions

View file

@ -2117,8 +2117,15 @@ void MatchExpr::dump(int level) const
builder.append(')'); builder.append(')');
} }
print_indented(builder.string_view(), level + 2); print_indented(builder.string_view(), level + 2);
for (auto& node : entry.options) entry.options.visit(
node.dump(level + 3); [&](NonnullRefPtrVector<Node> const& options) {
for (auto& option : options)
option.dump(level + 3);
},
[&](Vector<Regex<ECMA262>> const& options) {
for (auto& option : options)
print_indented(String::formatted("(regex: {})", option.pattern_value), level + 3);
});
print_indented("(execute)", level + 2); print_indented("(execute)", level + 2);
if (entry.body) if (entry.body)
entry.body->dump(level + 3); entry.body->dump(level + 3);
@ -2136,39 +2143,59 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell)
auto list = value->resolve_as_list(shell); auto list = value->resolve_as_list(shell);
auto list_matches = [&](auto&& pattern, auto& spans) { auto list_matches = [&](auto&& pattern, auto& spans) {
if (pattern.size() != list.size()) if constexpr (IsSame<RemoveCVReference<decltype(pattern)>, Regex<ECMA262>>) {
return false; if (list.size() != 1)
return false;
for (size_t i = 0; i < pattern.size(); ++i) { auto& subject = list.first();
Vector<AK::MaskSpan> mask_spans; auto match = pattern.match(subject);
if (!list[i].matches(pattern[i], mask_spans)) if (!match.success)
return false; return false;
for (auto& span : mask_spans)
spans.append(list[i].substring(span.start, span.length));
}
return true; spans.ensure_capacity(match.n_capture_groups);
for (size_t i = 0; i < match.n_capture_groups; ++i) {
auto& capture = match.capture_group_matches[0][i];
spans.append(capture.view.to_string());
}
return true;
} else {
if (pattern.size() != list.size())
return false;
for (size_t i = 0; i < pattern.size(); ++i) {
Vector<AK::MaskSpan> mask_spans;
if (!list[i].matches(pattern[i], mask_spans))
return false;
for (auto& span : mask_spans)
spans.append(list[i].substring(span.start, span.length));
}
return true;
}
}; };
auto resolve_pattern = [&](auto& option) { auto resolve_pattern = [&](auto& option) -> decltype(auto) {
Vector<String> pattern; if constexpr (IsSame<RemoveCVReference<decltype(option)>, Regex<ECMA262>>) {
if (option.is_glob()) { return option;
pattern.append(static_cast<const Glob*>(&option)->text());
} else if (option.is_bareword()) {
pattern.append(static_cast<const BarewordLiteral*>(&option)->text());
} else { } else {
auto list = option.run(shell); Vector<String> pattern;
if (shell && shell->has_any_error()) if (option.is_glob()) {
return pattern; pattern.append(static_cast<const Glob*>(&option)->text());
} else if (option.is_bareword()) {
pattern.append(static_cast<const BarewordLiteral*>(&option)->text());
} else {
auto list = option.run(shell);
if (shell && shell->has_any_error())
return pattern;
option.for_each_entry(shell, [&](auto&& value) { option.for_each_entry(shell, [&](auto&& value) {
pattern.extend(value->resolve_as_list(nullptr)); // Note: 'nullptr' incurs special behavior, pattern.extend(value->resolve_as_list(nullptr)); // Note: 'nullptr' incurs special behavior,
// asking the node for a 'raw' value. // asking the node for a 'raw' value.
return IterationDecision::Continue; return IterationDecision::Continue;
}); });
}
return pattern;
} }
return pattern;
}; };
auto frame = shell->push_frame(String::formatted("match ({})", this)); auto frame = shell->push_frame(String::formatted("match ({})", this));
@ -2176,24 +2203,31 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell)
shell->set_local_variable(m_expr_name, value, true); shell->set_local_variable(m_expr_name, value, true);
for (auto& entry : m_entries) { for (auto& entry : m_entries) {
for (auto& option : entry.options) { auto result = entry.options.visit([&](auto& options) -> Variant<IterationDecision, RefPtr<Value>> {
Vector<String> spans; for (auto& option : options) {
if (list_matches(resolve_pattern(option), spans)) { Vector<String> spans;
if (entry.body) { if (list_matches(resolve_pattern(option), spans)) {
if (entry.match_names.has_value()) { if (entry.body) {
size_t i = 0; if (entry.match_names.has_value()) {
for (auto& name : entry.match_names.value()) { size_t i = 0;
if (spans.size() > i) for (auto& name : entry.match_names.value()) {
shell->set_local_variable(name, make_ref_counted<AST::StringValue>(spans[i]), true); if (spans.size() > i)
++i; shell->set_local_variable(name, make_ref_counted<AST::StringValue>(spans[i]), true);
++i;
}
} }
return entry.body->run(shell);
} }
return entry.body->run(shell); return RefPtr<Value>(make_ref_counted<AST::ListValue>({}));
} else {
return make_ref_counted<AST::ListValue>({});
} }
} }
} return IterationDecision::Continue;
});
if (result.has<IterationDecision>() && result.get<IterationDecision>() == IterationDecision::Break)
break;
if (result.has<RefPtr<Value>>())
return move(result).get<RefPtr<Value>>();
} }
shell->raise_error(Shell::ShellError::EvaluatedSyntaxError, "Non-exhaustive match rules!", position()); shell->raise_error(Shell::ShellError::EvaluatedSyntaxError, "Non-exhaustive match rules!", position());
@ -2211,8 +2245,12 @@ void MatchExpr::highlight_in_editor(Line::Editor& editor, Shell& shell, Highligh
for (auto& entry : m_entries) { for (auto& entry : m_entries) {
metadata.is_first_in_list = false; metadata.is_first_in_list = false;
for (auto& option : entry.options) entry.options.visit(
option.highlight_in_editor(editor, shell, metadata); [&](NonnullRefPtrVector<Node>& node_options) {
for (auto& option : node_options)
option.highlight_in_editor(editor, shell, metadata);
},
[](auto&) {});
metadata.is_first_in_list = true; metadata.is_first_in_list = true;
if (entry.body) if (entry.body)

View file

@ -17,6 +17,7 @@
#include <AK/Types.h> #include <AK/Types.h>
#include <AK/Vector.h> #include <AK/Vector.h>
#include <LibLine/Editor.h> #include <LibLine/Editor.h>
#include <LibRegex/Regex.h>
namespace Shell::AST { namespace Shell::AST {
@ -1051,7 +1052,7 @@ private:
}; };
struct MatchEntry { struct MatchEntry {
NonnullRefPtrVector<Node> options; Variant<NonnullRefPtrVector<Node>, Vector<Regex<ECMA262>>> options;
Optional<Vector<String>> match_names; Optional<Vector<String>> match_names;
Optional<Position> match_as_position; Optional<Position> match_as_position;
Vector<Position> pipe_positions; Vector<Position> pipe_positions;

View file

@ -583,12 +583,24 @@ void Formatter::visit(const AST::MatchExpr* node)
insert_separator(); insert_separator();
first_entry = false; first_entry = false;
auto first = true; auto first = true;
for (auto& option : entry.options) { entry.options.visit(
if (!first) [&](NonnullRefPtrVector<AST::Node> const& patterns) {
current_builder().append(" | "); for (auto& option : patterns) {
first = false; if (!first)
option.visit(*this); current_builder().append(" | ");
} first = false;
option.visit(*this);
}
},
[&](Vector<Regex<ECMA262>> const& patterns) {
for (auto& option : patterns) {
if (!first)
current_builder().append(" | ");
first = false;
auto node = make_ref_counted<AST::BarewordLiteral>(AST::Position {}, option.pattern_value);
node->visit(*this);
}
});
current_builder().append(' '); current_builder().append(' ');
if (entry.match_names.has_value() && !entry.match_names.value().is_empty()) { if (entry.match_names.has_value() && !entry.match_names.value().is_empty()) {

View file

@ -141,8 +141,10 @@ void NodeVisitor::visit(const AST::MatchExpr* node)
{ {
node->matched_expr()->visit(*this); node->matched_expr()->visit(*this);
for (auto& entry : node->entries()) { for (auto& entry : node->entries()) {
for (auto& option : entry.options) if (auto* ptr = entry.options.get_pointer<NonnullRefPtrVector<Node>>()) {
option.visit(*this); for (auto& option : *ptr)
option.visit(*this);
}
if (entry.body) if (entry.body)
entry.body->visit(*this); entry.body->visit(*this);
} }

View file

@ -84,9 +84,9 @@ bool Parser::expect(StringView expected)
} }
template<typename A, typename... Args> template<typename A, typename... Args>
NonnullRefPtr<A> Parser::create(Args... args) NonnullRefPtr<A> Parser::create(Args&&... args)
{ {
return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, args...)); return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, forward<Args>(args)...));
} }
[[nodiscard]] OwnPtr<Parser::ScopedOffset> Parser::push_start() [[nodiscard]] OwnPtr<Parser::ScopedOffset> Parser::push_start()
@ -892,10 +892,10 @@ RefPtr<AST::Node> Parser::parse_match_expr()
for (;;) { for (;;) {
auto entry = parse_match_entry(); auto entry = parse_match_entry();
consume_while(is_any_of(" \t\n")); consume_while(is_any_of(" \t\n"));
if (entry.options.is_empty()) if (entry.options.visit([](auto& x) { return x.is_empty(); }))
break; break;
entries.append(entry); entries.append(move(entry));
} }
consume_while(is_any_of(" \t\n")); consume_while(is_any_of(" \t\n"));
@ -916,15 +916,32 @@ AST::MatchEntry Parser::parse_match_entry()
auto rule_start = push_start(); auto rule_start = push_start();
NonnullRefPtrVector<AST::Node> patterns; NonnullRefPtrVector<AST::Node> patterns;
Vector<Regex<ECMA262>> regexps;
Vector<AST::Position> pipe_positions; Vector<AST::Position> pipe_positions;
Optional<Vector<String>> match_names; Optional<Vector<String>> match_names;
Optional<AST::Position> match_as_position; Optional<AST::Position> match_as_position;
enum {
Regex,
Glob,
} pattern_kind;
auto pattern = parse_match_pattern(); consume_while(is_any_of(" \t\n"));
if (!pattern)
return { {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) };
patterns.append(pattern.release_nonnull()); auto regex_pattern = parse_regex_pattern();
if (regex_pattern.has_value()) {
if (auto error = regex_pattern.value().parser_result.error; error != regex::Error::NoError)
return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>(regex::get_error_string(error), false) };
pattern_kind = Regex;
regexps.append(regex_pattern.release_value());
} else {
auto glob_pattern = parse_match_pattern();
if (!glob_pattern)
return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) };
pattern_kind = Glob;
patterns.append(glob_pattern.release_nonnull());
}
consume_while(is_any_of(" \t\n")); consume_while(is_any_of(" \t\n"));
@ -934,14 +951,28 @@ AST::MatchEntry Parser::parse_match_entry()
while (expect('|')) { while (expect('|')) {
pipe_positions.append({ previous_pipe_start_position, m_offset, previous_pipe_start_line, line() }); pipe_positions.append({ previous_pipe_start_position, m_offset, previous_pipe_start_line, line() });
consume_while(is_any_of(" \t\n")); consume_while(is_any_of(" \t\n"));
auto pattern = parse_match_pattern(); switch (pattern_kind) {
if (!pattern) { case Regex: {
error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body", true); auto pattern = parse_regex_pattern();
if (!pattern.has_value()) {
error = create<AST::SyntaxError>("Expected a regex pattern to follow '|' in 'match' body", true);
break;
}
regexps.append(pattern.release_value());
break; break;
} }
consume_while(is_any_of(" \t\n")); case Glob: {
auto pattern = parse_match_pattern();
if (!pattern) {
error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body", true);
break;
}
patterns.append(pattern.release_nonnull());
break;
}
}
patterns.append(pattern.release_nonnull()); consume_while(is_any_of(" \t\n"));
previous_pipe_start_line = line(); previous_pipe_start_line = line();
previous_pipe_start_position = m_offset; previous_pipe_start_position = m_offset;
@ -951,7 +982,7 @@ AST::MatchEntry Parser::parse_match_entry()
auto as_start_position = m_offset; auto as_start_position = m_offset;
auto as_start_line = line(); auto as_start_line = line();
if (expect("as")) { if (pattern_kind == Glob && expect("as")) {
match_as_position = AST::Position { as_start_position, m_offset, as_start_line, line() }; match_as_position = AST::Position { as_start_position, m_offset, as_start_line, line() };
consume_while(is_any_of(" \t\n")); consume_while(is_any_of(" \t\n"));
if (!expect('(')) { if (!expect('(')) {
@ -975,6 +1006,31 @@ AST::MatchEntry Parser::parse_match_entry()
consume_while(is_any_of(" \t\n")); consume_while(is_any_of(" \t\n"));
} }
if (pattern_kind == Regex) {
Vector<String> names;
for (auto& regex : regexps) {
if (names.is_empty()) {
for (auto& name : regex.parser_result.capture_groups)
names.append(name);
} else {
size_t index = 0;
for (auto& name : regex.parser_result.capture_groups) {
if (names.size() <= index) {
names.append(name);
continue;
}
if (names[index] != name) {
if (!error)
error = create<AST::SyntaxError>("Alternative regex patterns must have the same capture groups", false);
break;
}
}
}
}
match_names = move(names);
}
if (!expect('{')) { if (!expect('{')) {
if (!error) if (!error)
error = create<AST::SyntaxError>("Expected an open brace '{' to start a match entry body", true); error = create<AST::SyntaxError>("Expected an open brace '{' to start a match entry body", true);
@ -992,7 +1048,10 @@ AST::MatchEntry Parser::parse_match_entry()
else if (error) else if (error)
body = error; body = error;
return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) }; if (pattern_kind == Glob)
return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
return { move(regexps), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
} }
RefPtr<AST::Node> Parser::parse_match_pattern() RefPtr<AST::Node> Parser::parse_match_pattern()
@ -1000,6 +1059,36 @@ RefPtr<AST::Node> Parser::parse_match_pattern()
return parse_expression(); return parse_expression();
} }
Optional<Regex<ECMA262>> Parser::parse_regex_pattern()
{
auto rule_start = push_start();
auto start = m_offset;
if (!expect("(?:") && !expect("(?<"))
return {};
size_t open_parens = 1;
while (open_parens > 0) {
if (at_end())
break;
if (next_is("("))
++open_parens;
else if (next_is(")"))
--open_parens;
consume();
}
if (open_parens != 0) {
restore_to(*rule_start);
return {};
}
auto end = m_offset;
auto pattern = m_input.substring_view(start, end - start);
return Regex<ECMA262>(pattern);
}
RefPtr<AST::Node> Parser::parse_redirection() RefPtr<AST::Node> Parser::parse_redirection()
{ {
auto rule_start = push_start(); auto rule_start = push_start();

View file

@ -25,7 +25,7 @@ public:
RefPtr<AST::Node> parse(); RefPtr<AST::Node> parse();
/// Parse the given string *as* an expression /// Parse the given string *as* an expression
/// that is to forefully enclose it in double-quotes. /// that is to forcefully enclose it in double-quotes.
RefPtr<AST::Node> parse_as_single_expression(); RefPtr<AST::Node> parse_as_single_expression();
NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions(); NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions();
@ -77,6 +77,7 @@ private:
RefPtr<AST::Node> parse_match_expr(); RefPtr<AST::Node> parse_match_expr();
AST::MatchEntry parse_match_entry(); AST::MatchEntry parse_match_entry();
RefPtr<AST::Node> parse_match_pattern(); RefPtr<AST::Node> parse_match_pattern();
Optional<Regex<ECMA262>> parse_regex_pattern();
RefPtr<AST::Node> parse_redirection(); RefPtr<AST::Node> parse_redirection();
RefPtr<AST::Node> parse_list_expression(); RefPtr<AST::Node> parse_list_expression();
RefPtr<AST::Node> parse_expression(); RefPtr<AST::Node> parse_expression();
@ -98,7 +99,7 @@ private:
bool parse_heredoc_entries(); bool parse_heredoc_entries();
template<typename A, typename... Args> template<typename A, typename... Args>
NonnullRefPtr<A> create(Args... args); NonnullRefPtr<A> create(Args&&... args);
void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); } void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); }
bool at_end() const bool at_end() const
@ -228,10 +229,15 @@ subshell :: '{' toplevel '}'
match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}' match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}'
match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}' match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}'
| regex_pattern ws* '{' toplevel '}'
identifier_list :: '(' (identifier ws*)* ')' identifier_list :: '(' (identifier ws*)* ')'
match_pattern :: expression (ws* '|' ws* expression)* regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)*
match_pattern :: expression (ws* '|' ws* expression)*
regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses }
command :: redirection command command :: redirection command
| list_expression command? | list_expression command?