1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 10:08:12 +00:00
serenity/Userland/Libraries/LibRegex/RegexMatcher.h
Timothy Flynn b162517065 LibRegex: Take ownership of pattern string and fix move operations
The Regex object created a copy of the pattern string anyways, so tweak
the constructor to allow callers to move() pattern strings into the
regex.

The Regex move constructor and assignment operator currently result in
memory corruption. The Regex object stores a Matcher object, which holds
a reference to the Regex object. So when the Regex object is moved, that
reference is no longer valid. To fix this, the reference stored in the
Matcher must be updated when the Regex is moved.
2021-07-30 21:26:31 +01:00

285 lines
10 KiB
C++

/*
* Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include "RegexByteCode.h"
#include "RegexMatch.h"
#include "RegexOptions.h"
#include "RegexParser.h"
#include <AK/Forward.h>
#include <AK/HashMap.h>
#include <AK/NonnullOwnPtrVector.h>
#include <AK/Types.h>
#include <AK/Utf32View.h>
#include <AK/Vector.h>
#include <ctype.h>
#include <stdio.h>
namespace regex {
static constexpr const size_t c_max_recursion = 5000;
static constexpr const size_t c_match_preallocation_count = 0;
struct RegexResult final {
bool success { false };
size_t count { 0 };
Vector<Match> matches;
Vector<Vector<Match>> capture_group_matches;
Vector<HashMap<String, Match>> named_capture_group_matches;
size_t n_operations { 0 };
size_t n_capture_groups { 0 };
size_t n_named_capture_groups { 0 };
};
template<class Parser>
class Regex;
template<class Parser>
class Matcher final {
public:
Matcher(Regex<Parser> const* pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
: m_pattern(pattern)
, m_regex_options(regex_options.value_or({}))
{
}
~Matcher() = default;
RegexResult match(RegexStringView const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
RegexResult match(Vector<RegexStringView> const, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
typename ParserTraits<Parser>::OptionsType options() const
{
return m_regex_options;
}
void reset_pattern(Badge<Regex<Parser>>, Regex<Parser> const* pattern)
{
m_pattern = pattern;
}
private:
Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output, size_t recursion_level) const;
ALWAYS_INLINE Optional<bool> execute_low_prio_forks(MatchInput const& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const;
Regex<Parser> const* m_pattern;
typename ParserTraits<Parser>::OptionsType const m_regex_options;
};
template<class Parser>
class Regex final {
public:
String pattern_value;
regex::Parser::Result parser_result;
OwnPtr<Matcher<Parser>> matcher { nullptr };
mutable size_t start_offset { 0 };
explicit Regex(String pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
~Regex() = default;
Regex(Regex&&);
Regex& operator=(Regex&&);
typename ParserTraits<Parser>::OptionsType options() const;
void print_bytecode(FILE* f = stdout) const;
String error_string(Optional<String> message = {}) const;
RegexResult match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
if (!matcher || parser_result.error != Error::NoError)
return {};
return matcher->match(view, regex_options);
}
RegexResult match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
if (!matcher || parser_result.error != Error::NoError)
return {};
return matcher->match(views, regex_options);
}
String replace(RegexStringView const view, StringView const& replacement_pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
if (!matcher || parser_result.error != Error::NoError)
return {};
StringBuilder builder;
size_t start_offset = 0;
RegexResult result = matcher->match(view, regex_options);
if (!result.success)
return view.to_string();
for (size_t i = 0; i < result.matches.size(); ++i) {
auto& match = result.matches[i];
builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string());
start_offset = match.global_offset + match.view.length();
GenericLexer lexer(replacement_pattern);
while (!lexer.is_eof()) {
if (lexer.consume_specific('\\')) {
if (lexer.consume_specific('\\')) {
builder.append('\\');
continue;
}
auto number = lexer.consume_while(isdigit);
if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) {
builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string());
} else {
builder.appendff("\\{}", number);
}
} else {
builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; }));
}
}
}
builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string());
return builder.to_string();
}
// FIXME: replace(Vector<RegexStringView> const , ...)
RegexResult search(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
if (!matcher || parser_result.error != Error::NoError)
return {};
AllOptions options = (AllOptions)regex_options.value_or({});
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
options.reset_flag(AllFlags::MatchNotEndOfLine);
options.reset_flag(AllFlags::MatchNotBeginOfLine);
}
options.reset_flag(AllFlags::Internal_Stateful);
options |= AllFlags::Global;
return matcher->match(view, options);
}
RegexResult search(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
if (!matcher || parser_result.error != Error::NoError)
return {};
AllOptions options = (AllOptions)regex_options.value_or({});
if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
options.reset_flag(AllFlags::MatchNotEndOfLine);
options.reset_flag(AllFlags::MatchNotBeginOfLine);
}
options.reset_flag(AllFlags::Internal_Stateful);
options |= AllFlags::Global;
return matcher->match(views, options);
}
bool match(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
m = match(view, regex_options);
return m.success;
}
bool match(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
m = match(views, regex_options);
return m.success;
}
bool search(RegexStringView const view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
m = search(view, regex_options);
return m.success;
}
bool search(Vector<RegexStringView> const views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
m = search(views, regex_options);
return m.success;
}
bool has_match(RegexStringView const view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
if (!matcher || parser_result.error != Error::NoError)
return false;
RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
return result.success;
}
bool has_match(Vector<RegexStringView> const views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
{
if (!matcher || parser_result.error != Error::NoError)
return false;
RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
return result.success;
}
};
// free standing functions for match, search and has_match
template<class Parser>
RegexResult match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.match(view, regex_options);
}
template<class Parser>
RegexResult match(Vector<RegexStringView> const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.match(view, regex_options);
}
template<class Parser>
bool match(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.match(view, regex_options);
}
template<class Parser>
bool match(Vector<RegexStringView> const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.match(view, regex_options);
}
template<class Parser>
RegexResult search(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.search(view, regex_options);
}
template<class Parser>
RegexResult search(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.search(views, regex_options);
}
template<class Parser>
bool search(RegexStringView const view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.search(view, regex_options);
}
template<class Parser>
bool search(Vector<RegexStringView> const views, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.search(views, regex_options);
}
template<class Parser>
bool has_match(RegexStringView const view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.has_match(view, regex_options);
}
template<class Parser>
bool has_match(Vector<RegexStringView> const views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
{
return pattern.has_match(views, regex_options);
}
}
using regex::has_match;
using regex::match;
using regex::Regex;
using regex::RegexResult;