mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 10:08:12 +00:00

The Regex object created a copy of the pattern string anyways, so tweak the constructor to allow callers to move() pattern strings into the regex. The Regex move constructor and assignment operator currently result in memory corruption. The Regex object stores a Matcher object, which holds a reference to the Regex object. So when the Regex object is moved, that reference is no longer valid. To fix this, the reference stored in the Matcher must be updated when the Regex is moved.
285 lines
10 KiB
C++
285 lines
10 KiB
C++
/*
|
|
* Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
|
|
*
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "RegexByteCode.h"
|
|
#include "RegexMatch.h"
|
|
#include "RegexOptions.h"
|
|
#include "RegexParser.h"
|
|
|
|
#include <AK/Forward.h>
|
|
#include <AK/HashMap.h>
|
|
#include <AK/NonnullOwnPtrVector.h>
|
|
#include <AK/Types.h>
|
|
#include <AK/Utf32View.h>
|
|
#include <AK/Vector.h>
|
|
#include <ctype.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
namespace regex {
|
|
|
|
static constexpr const size_t c_max_recursion = 5000;
|
|
static constexpr const size_t c_match_preallocation_count = 0;
|
|
|
|
struct RegexResult final {
|
|
bool success { false };
|
|
size_t count { 0 };
|
|
Vector<Match> matches;
|
|
Vector<Vector<Match>> capture_group_matches;
|
|
Vector<HashMap<String, Match>> named_capture_group_matches;
|
|
size_t n_operations { 0 };
|
|
size_t n_capture_groups { 0 };
|
|
size_t n_named_capture_groups { 0 };
|
|
};
|
|
|
|
template<class Parser>
|
|
class Regex;
|
|
|
|
template<class Parser>
|
|
class Matcher final {
|
|
|
|
public:
|
|
Matcher(Regex<Parser> const* pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
: m_pattern(pattern)
|
|
, m_regex_options(regex_options.value_or({}))
|
|
{
|
|
}
|
|
~Matcher() = default;
|
|
|
|
RegexResult match(RegexStringView const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
|
RegexResult match(Vector<RegexStringView> const, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
|
|
|
typename ParserTraits<Parser>::OptionsType options() const
|
|
{
|
|
return m_regex_options;
|
|
}
|
|
|
|
void reset_pattern(Badge<Regex<Parser>>, Regex<Parser> const* pattern)
|
|
{
|
|
m_pattern = pattern;
|
|
}
|
|
|
|
private:
|
|
Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output, size_t recursion_level) const;
|
|
ALWAYS_INLINE Optional<bool> execute_low_prio_forks(MatchInput const& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const;
|
|
|
|
Regex<Parser> const* m_pattern;
|
|
typename ParserTraits<Parser>::OptionsType const m_regex_options;
|
|
};
|
|
|
|
template<class Parser>
|
|
class Regex final {
|
|
public:
|
|
String pattern_value;
|
|
regex::Parser::Result parser_result;
|
|
OwnPtr<Matcher<Parser>> matcher { nullptr };
|
|
mutable size_t start_offset { 0 };
|
|
|
|
explicit Regex(String pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
|
~Regex() = default;
|
|
Regex(Regex&&);
|
|
Regex& operator=(Regex&&);
|
|
|
|
typename ParserTraits<Parser>::OptionsType options() const;
|
|
void print_bytecode(FILE* f = stdout) const;
|
|
String error_string(Optional<String> message = {}) const;
|
|
|
|
RegexResult match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
return matcher->match(view, regex_options);
|
|
}
|
|
|
|
RegexResult match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
return matcher->match(views, regex_options);
|
|
}
|
|
|
|
String replace(RegexStringView const view, StringView const& replacement_pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
StringBuilder builder;
|
|
size_t start_offset = 0;
|
|
RegexResult result = matcher->match(view, regex_options);
|
|
if (!result.success)
|
|
return view.to_string();
|
|
|
|
for (size_t i = 0; i < result.matches.size(); ++i) {
|
|
auto& match = result.matches[i];
|
|
builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string());
|
|
start_offset = match.global_offset + match.view.length();
|
|
GenericLexer lexer(replacement_pattern);
|
|
while (!lexer.is_eof()) {
|
|
if (lexer.consume_specific('\\')) {
|
|
if (lexer.consume_specific('\\')) {
|
|
builder.append('\\');
|
|
continue;
|
|
}
|
|
auto number = lexer.consume_while(isdigit);
|
|
if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) {
|
|
builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string());
|
|
} else {
|
|
builder.appendff("\\{}", number);
|
|
}
|
|
} else {
|
|
builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; }));
|
|
}
|
|
}
|
|
}
|
|
|
|
builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string());
|
|
|
|
return builder.to_string();
|
|
}
|
|
|
|
// FIXME: replace(Vector<RegexStringView> const , ...)
|
|
|
|
RegexResult search(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
AllOptions options = (AllOptions)regex_options.value_or({});
|
|
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
|
|
options.reset_flag(AllFlags::MatchNotEndOfLine);
|
|
options.reset_flag(AllFlags::MatchNotBeginOfLine);
|
|
}
|
|
options.reset_flag(AllFlags::Internal_Stateful);
|
|
options |= AllFlags::Global;
|
|
|
|
return matcher->match(view, options);
|
|
}
|
|
|
|
RegexResult search(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return {};
|
|
|
|
AllOptions options = (AllOptions)regex_options.value_or({});
|
|
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
|
|
options.reset_flag(AllFlags::MatchNotEndOfLine);
|
|
options.reset_flag(AllFlags::MatchNotBeginOfLine);
|
|
}
|
|
options.reset_flag(AllFlags::Internal_Stateful);
|
|
options |= AllFlags::Global;
|
|
|
|
return matcher->match(views, options);
|
|
}
|
|
|
|
bool match(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = match(view, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool match(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = match(views, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool search(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = search(view, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool search(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
m = search(views, regex_options);
|
|
return m.success;
|
|
}
|
|
|
|
bool has_match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return false;
|
|
RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
|
|
return result.success;
|
|
}
|
|
|
|
bool has_match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
|
|
{
|
|
if (!matcher || parser_result.error != Error::NoError)
|
|
return false;
|
|
RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
|
|
return result.success;
|
|
}
|
|
};
|
|
|
|
// free standing functions for match, search and has_match
|
|
template<class Parser>
|
|
RegexResult match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult match(Vector<RegexStringView> const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool match(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool match(Vector<RegexStringView> const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult search(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
RegexResult search(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(views, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool search(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool search(Vector<RegexStringView> const views, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.search(views, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool has_match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.has_match(view, regex_options);
|
|
}
|
|
|
|
template<class Parser>
|
|
bool has_match(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
|
|
{
|
|
return pattern.has_match(views, regex_options);
|
|
}
|
|
}
|
|
|
|
using regex::has_match;
|
|
using regex::match;
|
|
using regex::Regex;
|
|
using regex::RegexResult;
|