1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 23:37:35 +00:00

Shell: Add support for regex match patterns

We previously allowed globs as match pattern, but for more complex
matching needs, it's nice to have regular expressions.
And as the existing "name a part of the match" concept maps nicely to
named capture groups, we can simply reuse the same code and make groups
with names available in the match body.
This commit is contained in:
Ali Mohammad Pur 2022-04-15 01:50:36 +04:30 committed by Ali Mohammad Pur
parent 6aceec4535
commit 4ede121d31
6 changed files with 219 additions and 71 deletions

View file

@ -84,9 +84,9 @@ bool Parser::expect(StringView expected)
}
template<typename A, typename... Args>
NonnullRefPtr<A> Parser::create(Args... args)
NonnullRefPtr<A> Parser::create(Args&&... args)
{
return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, args...));
return adopt_ref(*new A(AST::Position { m_rule_start_offsets.last(), m_offset, m_rule_start_lines.last(), line() }, forward<Args>(args)...));
}
[[nodiscard]] OwnPtr<Parser::ScopedOffset> Parser::push_start()
@ -892,10 +892,10 @@ RefPtr<AST::Node> Parser::parse_match_expr()
for (;;) {
auto entry = parse_match_entry();
consume_while(is_any_of(" \t\n"));
if (entry.options.is_empty())
if (entry.options.visit([](auto& x) { return x.is_empty(); }))
break;
entries.append(entry);
entries.append(move(entry));
}
consume_while(is_any_of(" \t\n"));
@ -916,15 +916,32 @@ AST::MatchEntry Parser::parse_match_entry()
auto rule_start = push_start();
NonnullRefPtrVector<AST::Node> patterns;
Vector<Regex<ECMA262>> regexps;
Vector<AST::Position> pipe_positions;
Optional<Vector<String>> match_names;
Optional<AST::Position> match_as_position;
enum {
Regex,
Glob,
} pattern_kind;
auto pattern = parse_match_pattern();
if (!pattern)
return { {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) };
consume_while(is_any_of(" \t\n"));
patterns.append(pattern.release_nonnull());
auto regex_pattern = parse_regex_pattern();
if (regex_pattern.has_value()) {
if (auto error = regex_pattern.value().parser_result.error; error != regex::Error::NoError)
return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>(regex::get_error_string(error), false) };
pattern_kind = Regex;
regexps.append(regex_pattern.release_value());
} else {
auto glob_pattern = parse_match_pattern();
if (!glob_pattern)
return { NonnullRefPtrVector<AST::Node> {}, {}, {}, {}, create<AST::SyntaxError>("Expected a pattern in 'match' body", true) };
pattern_kind = Glob;
patterns.append(glob_pattern.release_nonnull());
}
consume_while(is_any_of(" \t\n"));
@ -934,14 +951,28 @@ AST::MatchEntry Parser::parse_match_entry()
while (expect('|')) {
pipe_positions.append({ previous_pipe_start_position, m_offset, previous_pipe_start_line, line() });
consume_while(is_any_of(" \t\n"));
auto pattern = parse_match_pattern();
if (!pattern) {
error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body", true);
switch (pattern_kind) {
case Regex: {
auto pattern = parse_regex_pattern();
if (!pattern.has_value()) {
error = create<AST::SyntaxError>("Expected a regex pattern to follow '|' in 'match' body", true);
break;
}
regexps.append(pattern.release_value());
break;
}
consume_while(is_any_of(" \t\n"));
case Glob: {
auto pattern = parse_match_pattern();
if (!pattern) {
error = create<AST::SyntaxError>("Expected a pattern to follow '|' in 'match' body", true);
break;
}
patterns.append(pattern.release_nonnull());
break;
}
}
patterns.append(pattern.release_nonnull());
consume_while(is_any_of(" \t\n"));
previous_pipe_start_line = line();
previous_pipe_start_position = m_offset;
@ -951,7 +982,7 @@ AST::MatchEntry Parser::parse_match_entry()
auto as_start_position = m_offset;
auto as_start_line = line();
if (expect("as")) {
if (pattern_kind == Glob && expect("as")) {
match_as_position = AST::Position { as_start_position, m_offset, as_start_line, line() };
consume_while(is_any_of(" \t\n"));
if (!expect('(')) {
@ -975,6 +1006,31 @@ AST::MatchEntry Parser::parse_match_entry()
consume_while(is_any_of(" \t\n"));
}
if (pattern_kind == Regex) {
Vector<String> names;
for (auto& regex : regexps) {
if (names.is_empty()) {
for (auto& name : regex.parser_result.capture_groups)
names.append(name);
} else {
size_t index = 0;
for (auto& name : regex.parser_result.capture_groups) {
if (names.size() <= index) {
names.append(name);
continue;
}
if (names[index] != name) {
if (!error)
error = create<AST::SyntaxError>("Alternative regex patterns must have the same capture groups", false);
break;
}
}
}
}
match_names = move(names);
}
if (!expect('{')) {
if (!error)
error = create<AST::SyntaxError>("Expected an open brace '{' to start a match entry body", true);
@ -992,7 +1048,10 @@ AST::MatchEntry Parser::parse_match_entry()
else if (error)
body = error;
return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
if (pattern_kind == Glob)
return { move(patterns), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
return { move(regexps), move(match_names), move(match_as_position), move(pipe_positions), move(body) };
}
RefPtr<AST::Node> Parser::parse_match_pattern()
@ -1000,6 +1059,36 @@ RefPtr<AST::Node> Parser::parse_match_pattern()
return parse_expression();
}
Optional<Regex<ECMA262>> Parser::parse_regex_pattern()
{
auto rule_start = push_start();
auto start = m_offset;
if (!expect("(?:") && !expect("(?<"))
return {};
size_t open_parens = 1;
while (open_parens > 0) {
if (at_end())
break;
if (next_is("("))
++open_parens;
else if (next_is(")"))
--open_parens;
consume();
}
if (open_parens != 0) {
restore_to(*rule_start);
return {};
}
auto end = m_offset;
auto pattern = m_input.substring_view(start, end - start);
return Regex<ECMA262>(pattern);
}
RefPtr<AST::Node> Parser::parse_redirection()
{
auto rule_start = push_start();