mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 08:08:12 +00:00

Combining these into one list helps reduce the size of MatchState, and as a result, reduces the amount of memory consumed during execution of very large regex matches. Doing this also allows us to remove a few regex byte code instructions: ClearNamedCaptureGroup, SaveLeftNamedCaptureGroup, and NamedReference. Named groups now behave the same as unnamed groups for these operations. Note that SaveRightNamedCaptureGroup still exists to cache the matched group name. This also removes the recursion level from the MatchState, as it can exist as a local variable in Matcher::execute instead.
286 lines
10 KiB
C++
286 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "RegexByteCode.h"
|
|
#include "RegexMatch.h"
|
|
#include "RegexOptions.h"
|
|
#include "RegexParser.h"
|
|
|
|
#include <AK/Forward.h>
|
|
#include <AK/HashMap.h>
|
|
#include <AK/NonnullOwnPtrVector.h>
|
|
#include <AK/Types.h>
|
|
#include <AK/Utf32View.h>
|
|
#include <AK/Vector.h>
|
|
#include <ctype.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
namespace regex {
|
|
|
|
static constexpr const size_t c_max_recursion = 5000;
|
|
static constexpr const size_t c_match_preallocation_count = 0;
|
|
|
|
struct RegexResult final {
|
|
bool success { false };
|
|
size_t count { 0 };
|
|
Vector<Match> matches;
|
|
Vector<Vector<Match>> capture_group_matches;
|
|
size_t n_operations { 0 };
|
|
size_t n_capture_groups { 0 };
|
|
size_t n_named_capture_groups { 0 };
|
|
};
|
|
|
|
template<class Parser>
|
|
class Regex;
|
|
|
|
template<class Parser>
|
|
class Matcher final {
|
|
|
|
public:
|
|
Matcher(Regex<Parser> const* pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
: m_pattern(pattern)
|
|
, m_regex_options(regex_options.value_or({}))
|
|
{
|
|
}
|
|
~Matcher() = default;
|
|
|
|
RegexResult match(RegexStringView const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
|
RegexResult match(Vector<RegexStringView> const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
|
|
|
typename ParserTraits<Parser>::OptionsType options() const
|
|
{
|
|
return m_regex_options;
|
|
}
|
|
|
|
void reset_pattern(Badge<Regex<Parser>>, Regex<Parser> const* pattern)
|
|
{
|
|
m_pattern = pattern;
|
|
}
|
|
|
|
private:
|
|
Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output) const;
|
|
|
|
Regex<Parser> const* m_pattern;
|
|
typename ParserTraits<Parser>::OptionsType const m_regex_options;
|
|
};
|
|
|
|
template<class Parser>
|
|
class Regex final {
|
|
public:
|
|
String pattern_value;
|
|
regex::Parser::Result parser_result;
|
|
OwnPtr<Matcher<Parser>> matcher { nullptr };
|
|
mutable size_t start_offset { 0 };
|
|
|
|
static regex::Parser::Result parse_pattern(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
|
|
|
explicit Regex(String pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
|
Regex(regex::Parser::Result parse_result, String pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
|
~Regex() = default;
|
|
Regex(Regex&&);
|
|
Regex& operator=(Regex&&);
|
|
|
|
typename ParserTraits<Parser>::OptionsType options() const;
|
|
void print_bytecode(FILE* f = stdout) const;
|
|
String error_string(Optional<String> message = {}) const;
|
|
|
|
RegexResult match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
return matcher->match(view, regex_options);
|
|
}
|
|
|
|
RegexResult match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
return matcher->match(views, regex_options);
|
|
}
|
|
|
|
String replace(RegexStringView const view, StringView const& replacement_pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
StringBuilder builder;
|
|
size_t start_offset = 0;
|
|
RegexResult result = matcher->match(view, regex_options);
|
|
if (!result.success)
|
|
return view.to_string();
|
|
|
|
for (size_t i = 0; i < result.matches.size(); ++i) {
|
|
auto& match = result.matches[i];
|
|
builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string());
|
|
start_offset = match.global_offset + match.view.length();
|
|
GenericLexer lexer(replacement_pattern);
|
|
while (!lexer.is_eof()) {
|
|
if (lexer.consume_specific('\\')) {
|
|
if (lexer.consume_specific('\\')) {
|
|
builder.append('\\');
|
|
continue;
|
|
}
|
|
auto number = lexer.consume_while(isdigit);
|
|
if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) {
|
|
builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string());
|
|
} else {
|
|
builder.appendff("\\{}", number);
|
|
}
|
|
} else {
|
|
builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; }));
|
|
}
|
|
}
|
|
}
|
|
|
|
builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string());
|
|
|
|
return builder.to_string();
|
|
}
|
|
|
|
// FIXME: replace(Vector<RegexStringView> const , ...)
|
|
|
|
RegexResult search(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
AllOptions options = (AllOptions)regex_options.value_or({});
|
|
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
|
|
options.reset_flag(AllFlags::MatchNotEndOfLine);
|
|
options.reset_flag(AllFlags::MatchNotBeginOfLine);
|
|
}
|
|
options.reset_flag(AllFlags::Internal_Stateful);
|
|
options |= AllFlags::Global;
|
|
|
|
return matcher->match(view, options);
|
|
}
|
|
|
|
RegexResult search(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
AllOptions options = (AllOptions)regex_options.value_or({});
|
|
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
|
|
options.reset_flag(AllFlags::MatchNotEndOfLine);
|
|
options.reset_flag(AllFlags::MatchNotBeginOfLine);
|
|
}
|
|
options.reset_flag(AllFlags::Internal_Stateful);
|
|
options |= AllFlags::Global;
|
|
|
|
return matcher->match(views, options);
|
|
}
|
|
|
|
bool match(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = match(view, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool match(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = match(views, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool search(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = search(view, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool search(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = search(views, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool has_match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return false;
|
|
RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
|
|
return result.success;
|
|
}
|
|
|
|
bool has_match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return false;
|
|
RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
|
|
return result.success;
|
|
}
|
|
};
|
|
|
|
// free standing functions for match, search and has_match
|
|
template<class Parser>
|
|
RegexResult match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult match(Vector<RegexStringView> const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool match(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool match(Vector<RegexStringView> const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult search(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult search(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(views, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool search(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool search(Vector<RegexStringView> const views, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(views, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool has_match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.has_match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool has_match(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.has_match(views, regex_options);
|
|
}
|
|
}
|
|
|
|
using regex::has_match;
|
|
using regex::match;
|
|
using regex::Regex;
|
|
using regex::RegexResult;
|