1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 08:57:34 +00:00

Shell: Add support for regex match patterns

We previously allowed globs as match pattern, but for more complex
matching needs, it's nice to have regular expressions.
And as the existing "name a part of the match" concept maps nicely to
named capture groups, we can simply reuse the same code and make groups
with names available in the match body.
This commit is contained in:
Ali Mohammad Pur 2022-04-15 01:50:36 +04:30 committed by Ali Mohammad Pur
parent 6aceec4535
commit 4ede121d31
6 changed files with 219 additions and 71 deletions

View file

@ -2117,8 +2117,15 @@ void MatchExpr::dump(int level) const
builder.append(')');
}
print_indented(builder.string_view(), level + 2);
for (auto& node : entry.options)
node.dump(level + 3);
entry.options.visit(
[&](NonnullRefPtrVector<Node> const& options) {
for (auto& option : options)
option.dump(level + 3);
},
[&](Vector<Regex<ECMA262>> const& options) {
for (auto& option : options)
print_indented(String::formatted("(regex: {})", option.pattern_value), level + 3);
});
print_indented("(execute)", level + 2);
if (entry.body)
entry.body->dump(level + 3);
@ -2136,6 +2143,21 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell)
auto list = value->resolve_as_list(shell);
auto list_matches = [&](auto&& pattern, auto& spans) {
if constexpr (IsSame<RemoveCVReference<decltype(pattern)>, Regex<ECMA262>>) {
if (list.size() != 1)
return false;
auto& subject = list.first();
auto match = pattern.match(subject);
if (!match.success)
return false;
spans.ensure_capacity(match.n_capture_groups);
for (size_t i = 0; i < match.n_capture_groups; ++i) {
auto& capture = match.capture_group_matches[0][i];
spans.append(capture.view.to_string());
}
return true;
} else {
if (pattern.size() != list.size())
return false;
@ -2148,9 +2170,13 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell)
}
return true;
}
};
auto resolve_pattern = [&](auto& option) {
auto resolve_pattern = [&](auto& option) -> decltype(auto) {
if constexpr (IsSame<RemoveCVReference<decltype(option)>, Regex<ECMA262>>) {
return option;
} else {
Vector<String> pattern;
if (option.is_glob()) {
pattern.append(static_cast<const Glob*>(&option)->text());
@ -2169,6 +2195,7 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell)
}
return pattern;
}
};
auto frame = shell->push_frame(String::formatted("match ({})", this));
@ -2176,7 +2203,8 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell)
shell->set_local_variable(m_expr_name, value, true);
for (auto& entry : m_entries) {
for (auto& option : entry.options) {
auto result = entry.options.visit([&](auto& options) -> Variant<IterationDecision, RefPtr<Value>> {
for (auto& option : options) {
Vector<String> spans;
if (list_matches(resolve_pattern(option), spans)) {
if (entry.body) {
@ -2189,11 +2217,17 @@ RefPtr<Value> MatchExpr::run(RefPtr<Shell> shell)
}
}
return entry.body->run(shell);
} else {
return make_ref_counted<AST::ListValue>({});
}
}
return RefPtr<Value>(make_ref_counted<AST::ListValue>({}));
}
}
return IterationDecision::Continue;
});
if (result.has<IterationDecision>() && result.get<IterationDecision>() == IterationDecision::Break)
break;
if (result.has<RefPtr<Value>>())
return move(result).get<RefPtr<Value>>();
}
shell->raise_error(Shell::ShellError::EvaluatedSyntaxError, "Non-exhaustive match rules!", position());
@ -2211,8 +2245,12 @@ void MatchExpr::highlight_in_editor(Line::Editor& editor, Shell& shell, Highligh
for (auto& entry : m_entries) {
metadata.is_first_in_list = false;
for (auto& option : entry.options)
entry.options.visit(
[&](NonnullRefPtrVector<Node>& node_options) {
for (auto& option : node_options)
option.highlight_in_editor(editor, shell, metadata);
},
[](auto&) {});
metadata.is_first_in_list = true;
if (entry.body)

View file

@ -17,6 +17,7 @@
#include <AK/Types.h>
#include <AK/Vector.h>
#include <LibLine/Editor.h>
#include <LibRegex/Regex.h>
namespace Shell::AST {
@ -1051,7 +1052,7 @@ private:
};
struct MatchEntry {
NonnullRefPtrVector<Node> options;
Variant<NonnullRefPtrVector<Node>, Vector<Regex<ECMA262>>> options;
Optional<Vector<String>> match_names;
Optional<Position> match_as_position;
Vector<Position> pipe_positions;

View file

@ -583,12 +583,24 @@ void Formatter::visit(const AST::MatchExpr* node)
insert_separator();
first_entry = false;
auto first = true;
for (auto& option : entry.options) {
entry.options.visit(
[&](NonnullRefPtrVector<AST::Node> const& patterns) {
for (auto& option : patterns) {
if (!first)
current_builder().append(" | ");
first = false;
option.visit(*this);
}
},
[&](Vector<Regex<ECMA262>> const& patterns) {
for (auto& option : patterns) {
if (!first)
current_builder().append(" | ");
first = false;
auto node = make_ref_counted<AST::BarewordLiteral>(AST::Position {}, option.pattern_value);
node->visit(*this);
}
});
current_builder().append(' ');
if (entry.match_names.has_value() && !entry.match_names.value().is_empty()) {

View file

@ -141,8 +141,10 @@ void NodeVisitor::visit(const AST::MatchExpr* node)
{
node->matched_expr()->visit(*this);
for (auto& entry : node->entries()) {
for (auto& option : entry.options)
if (auto* ptr = entry.options.get_pointer<NonnullRefPtrVector<Node>>()) {
for (auto& option : *ptr)
option.visit(*this);
}
if (entry.body)
entry.body->visit(*this);
}

View file

@ -84,9 +84,9 @@ bool Parser::expect(StringView expected)
}
template<typename A, typename... Args>
NonnullRefPtr<A> Parser::create(Args... args)
NonnullRefPtr<A> Parser::create(Args&&... args)
{
return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, args...));
return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, forward<Args>(args)...));
}
[[nodiscard]] OwnPtr<Parser::ScopedOffset> Parser::push_start()
@ -892,10 +892,10 @@ RefPtr<AST::Node> Parser::parse_match_expr()
for (;;) {
auto entry = parse_match_entry();
consume_while(is_any_of(" \t\n"));
if (entry.options.is_empty())
if (entry.options.visit([](auto& x) { return x.is_empty(); }))
break;
entries.append(entry);
entries.append(move(entry));
}
consume_while(is_any_of(" \t\n"));
@ -916,15 +916,32 @@ AST::MatchEntry Parser::parse_match_entry()
auto rule_start = push_start();
NonnullRefPtrVector<AST::Node> patterns;
Vector<Regex<ECMA262>> regexps;
Vector<AST::Position> pipe_positions;
Optional<Vector<String>> match_names;
Optional<AST::Position> match_as_position;
enum {
Regex,
Glob,
} pattern_kind;
auto pattern = parse_match_pattern();
if (!pattern)
return { {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) };
consume_while(is_any_of(" \t\n"));
patterns.append(pattern.release_nonnull());
auto regex_pattern = parse_regex_pattern();
if (regex_pattern.has_value()) {
if (auto error = regex_pattern.value().parser_result.error; error != regex::Error::NoError)
return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>(regex::get_error_string(error), false) };
pattern_kind = Regex;
regexps.append(regex_pattern.release_value());
} else {
auto glob_pattern = parse_match_pattern();
if (!glob_pattern)
return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) };
pattern_kind = Glob;
patterns.append(glob_pattern.release_nonnull());
}
consume_while(is_any_of(" \t\n"));
@ -934,14 +951,28 @@ AST::MatchEntry Parser::parse_match_entry()
while (expect('|')) {
pipe_positions.append({ previous_pipe_start_position, m_offset, previous_pipe_start_line, line() });
consume_while(is_any_of(" \t\n"));
switch (pattern_kind) {
case Regex: {
auto pattern = parse_regex_pattern();
if (!pattern.has_value()) {
error = create<AST::SyntaxError>("Expected a regex pattern to follow '|' in 'match' body", true);
break;
}
regexps.append(pattern.release_value());
break;
}
case Glob: {
auto pattern = parse_match_pattern();
if (!pattern) {
error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body", true);
break;
}
consume_while(is_any_of(" \t\n"));
patterns.append(pattern.release_nonnull());
break;
}
}
consume_while(is_any_of(" \t\n"));
previous_pipe_start_line = line();
previous_pipe_start_position = m_offset;
@ -951,7 +982,7 @@ AST::MatchEntry Parser::parse_match_entry()
auto as_start_position = m_offset;
auto as_start_line = line();
if (expect("as")) {
if (pattern_kind == Glob && expect("as")) {
match_as_position = AST::Position { as_start_position, m_offset, as_start_line, line() };
consume_while(is_any_of(" \t\n"));
if (!expect('(')) {
@ -975,6 +1006,31 @@ AST::MatchEntry Parser::parse_match_entry()
consume_while(is_any_of(" \t\n"));
}
if (pattern_kind == Regex) {
Vector<String> names;
for (auto& regex : regexps) {
if (names.is_empty()) {
for (auto& name : regex.parser_result.capture_groups)
names.append(name);
} else {
size_t index = 0;
for (auto& name : regex.parser_result.capture_groups) {
if (names.size() <= index) {
names.append(name);
continue;
}
if (names[index] != name) {
if (!error)
error = create<AST::SyntaxError>("Alternative regex patterns must have the same capture groups", false);
break;
}
}
}
}
match_names = move(names);
}
if (!expect('{')) {
if (!error)
error = create<AST::SyntaxError>("Expected an open brace '{' to start a match entry body", true);
@ -992,7 +1048,10 @@ AST::MatchEntry Parser::parse_match_entry()
else if (error)
body = error;
if (pattern_kind == Glob)
return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
return { move(regexps), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
}
RefPtr<AST::Node> Parser::parse_match_pattern()
@ -1000,6 +1059,36 @@ RefPtr<AST::Node> Parser::parse_match_pattern()
return parse_expression();
}
Optional<Regex<ECMA262>> Parser::parse_regex_pattern()
{
auto rule_start = push_start();
auto start = m_offset;
if (!expect("(?:") && !expect("(?<"))
return {};
size_t open_parens = 1;
while (open_parens > 0) {
if (at_end())
break;
if (next_is("("))
++open_parens;
else if (next_is(")"))
--open_parens;
consume();
}
if (open_parens != 0) {
restore_to(*rule_start);
return {};
}
auto end = m_offset;
auto pattern = m_input.substring_view(start, end - start);
return Regex<ECMA262>(pattern);
}
RefPtr<AST::Node> Parser::parse_redirection()
{
auto rule_start = push_start();

View file

@ -25,7 +25,7 @@ public:
RefPtr<AST::Node> parse();
/// Parse the given string *as* an expression
/// that is to forefully enclose it in double-quotes.
/// that is to forcefully enclose it in double-quotes.
RefPtr<AST::Node> parse_as_single_expression();
NonnullRefPtrVector<AST::Node> parse_as_multiple_expressions();
@ -77,6 +77,7 @@ private:
RefPtr<AST::Node> parse_match_expr();
AST::MatchEntry parse_match_entry();
RefPtr<AST::Node> parse_match_pattern();
Optional<Regex<ECMA262>> parse_regex_pattern();
RefPtr<AST::Node> parse_redirection();
RefPtr<AST::Node> parse_list_expression();
RefPtr<AST::Node> parse_expression();
@ -98,7 +99,7 @@ private:
bool parse_heredoc_entries();
template<typename A, typename... Args>
NonnullRefPtr<A> create(Args... args);
NonnullRefPtr<A> create(Args&&... args);
void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); }
bool at_end() const
@ -228,11 +229,16 @@ subshell :: '{' toplevel '}'
match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}'
match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}'
| regex_pattern ws* '{' toplevel '}'
identifier_list :: '(' (identifier ws*)* ')'
regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)*
match_pattern :: expression (ws* '|' ws* expression)*
regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses }
command :: redirection command
| list_expression command?