1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 19:27:45 +00:00

LibRegex: Take ownership of pattern string and fix move operations

The Regex object created a copy of the pattern string anyways, so tweak
the constructor to allow callers to move() pattern strings into the
regex.

The Regex move constructor and assignment operator currently result in
memory corruption. The Regex object stores a Matcher object, which holds
a reference to the Regex object. So when the Regex object is moved, that
reference is no longer valid. To fix this, the reference stored in the
Matcher must be updated when the Regex is moved.
This commit is contained in:
Timothy Flynn 2021-07-29 10:21:47 -04:00 committed by Linus Groh
parent 4a72b2c879
commit b162517065
2 changed files with 50 additions and 22 deletions

View file

@ -19,22 +19,45 @@ static RegexDebug s_regex_dbg(stderr);
#endif #endif
template<class Parser> template<class Parser>
Regex<Parser>::Regex(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options) Regex<Parser>::Regex(String pattern, typename ParserTraits<Parser>::OptionsType regex_options)
: pattern_value(move(pattern))
{ {
pattern_value = pattern.to_string(); regex::Lexer lexer(pattern_value);
regex::Lexer lexer(pattern);
Parser parser(lexer, regex_options); Parser parser(lexer, regex_options);
parser_result = parser.parse(); parser_result = parser.parse();
if (parser_result.error == regex::Error::NoError) if (parser_result.error == regex::Error::NoError)
matcher = make<Matcher<Parser>>(*this, regex_options); matcher = make<Matcher<Parser>>(this, regex_options);
}
template<class Parser>
Regex<Parser>::Regex(Regex&& regex)
: pattern_value(move(regex.pattern_value))
, parser_result(move(regex.parser_result))
, matcher(move(regex.matcher))
, start_offset(regex.start_offset)
{
if (matcher)
matcher->reset_pattern({}, this);
}
template<class Parser>
Regex<Parser>& Regex<Parser>::operator=(Regex&& regex)
{
pattern_value = move(regex.pattern_value);
parser_result = move(regex.parser_result);
matcher = move(regex.matcher);
if (matcher)
matcher->reset_pattern({}, this);
start_offset = regex.start_offset;
return *this;
} }
template<class Parser> template<class Parser>
typename ParserTraits<Parser>::OptionsType Regex<Parser>::options() const typename ParserTraits<Parser>::OptionsType Regex<Parser>::options() const
{ {
if (parser_result.error != Error::NoError) if (!matcher || parser_result.error != Error::NoError)
return {}; return {};
return matcher->options(); return matcher->options();
@ -71,7 +94,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
{ {
// If the pattern *itself* isn't stateful, reset any changes to start_offset. // If the pattern *itself* isn't stateful, reset any changes to start_offset.
if (!((AllFlags)m_regex_options.value() & AllFlags::Internal_Stateful)) if (!((AllFlags)m_regex_options.value() & AllFlags::Internal_Stateful))
m_pattern.start_offset = 0; m_pattern->start_offset = 0;
size_t match_count { 0 }; size_t match_count { 0 };
@ -80,7 +103,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
MatchOutput output; MatchOutput output;
input.regex_options = m_regex_options | regex_options.value_or({}).value(); input.regex_options = m_regex_options | regex_options.value_or({}).value();
input.start_offset = m_pattern.start_offset; input.start_offset = m_pattern->start_offset;
output.operations = 0; output.operations = 0;
size_t lines_to_skip = 0; size_t lines_to_skip = 0;
@ -107,8 +130,8 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
state.capture_group_matches.ensure_capacity(c_match_preallocation_count); state.capture_group_matches.ensure_capacity(c_match_preallocation_count);
state.named_capture_group_matches.ensure_capacity(c_match_preallocation_count); state.named_capture_group_matches.ensure_capacity(c_match_preallocation_count);
auto& capture_groups_count = m_pattern.parser_result.capture_groups_count; auto& capture_groups_count = m_pattern->parser_result.capture_groups_count;
auto& named_capture_groups_count = m_pattern.parser_result.named_capture_groups_count; auto& named_capture_groups_count = m_pattern->parser_result.named_capture_groups_count;
for (size_t j = 0; j < c_match_preallocation_count; ++j) { for (size_t j = 0; j < c_match_preallocation_count; ++j) {
state.matches.empend(); state.matches.empend();
@ -152,11 +175,11 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view); dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view);
auto view_length = view.length(); auto view_length = view.length();
size_t view_index = m_pattern.start_offset; size_t view_index = m_pattern->start_offset;
state.string_position = view_index; state.string_position = view_index;
bool succeeded = false; bool succeeded = false;
if (view_index == view_length && m_pattern.parser_result.match_length_minimum == 0) { if (view_index == view_length && m_pattern->parser_result.match_length_minimum == 0) {
// Run the code until it tries to consume something. // Run the code until it tries to consume something.
// This allows non-consuming code to run on empty strings, for instance // This allows non-consuming code to run on empty strings, for instance
// e.g. "Exit" // e.g. "Exit"
@ -183,7 +206,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
} }
for (; view_index < view_length; ++view_index) { for (; view_index < view_length; ++view_index) {
auto& match_length_minimum = m_pattern.parser_result.match_length_minimum; auto& match_length_minimum = m_pattern->parser_result.match_length_minimum;
// FIXME: More performant would be to know the remaining minimum string // FIXME: More performant would be to know the remaining minimum string
// length needed to match from the current position onwards within // length needed to match from the current position onwards within
// the vm. Add new OpCode for MinMatchLengthFromSp with the value of // the vm. Add new OpCode for MinMatchLengthFromSp with the value of
@ -249,7 +272,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
input.global_offset += view.length() + 1; // +1 includes the line break character input.global_offset += view.length() + 1; // +1 includes the line break character
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
m_pattern.start_offset = state.string_position; m_pattern->start_offset = state.string_position;
if (succeeded && !continue_search) if (succeeded && !continue_search)
break; break;
@ -262,7 +285,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
if (output_copy.capture_group_matches.size() < match_count) if (output_copy.capture_group_matches.size() < match_count)
output_copy.capture_group_matches.resize(match_count); output_copy.capture_group_matches.resize(match_count);
for (auto& matches : output_copy.capture_group_matches) for (auto& matches : output_copy.capture_group_matches)
matches.resize(m_pattern.parser_result.capture_groups_count + 1); matches.resize(m_pattern->parser_result.capture_groups_count + 1);
if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) { if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) {
for (auto& matches : output_copy.capture_group_matches) for (auto& matches : output_copy.capture_group_matches)
matches.template remove_all_matching([](auto& match) { return match.view.is_null(); }); matches.template remove_all_matching([](auto& match) { return match.view.is_null(); });
@ -286,8 +309,8 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const views, Optional
move(output_copy.capture_group_matches), move(output_copy.capture_group_matches),
move(output_copy.named_capture_group_matches), move(output_copy.named_capture_group_matches),
output.operations, output.operations,
m_pattern.parser_result.capture_groups_count, m_pattern->parser_result.capture_groups_count,
m_pattern.parser_result.named_capture_groups_count, m_pattern->parser_result.named_capture_groups_count,
}; };
} }
@ -301,7 +324,7 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
MatchState fork_high_prio_state; MatchState fork_high_prio_state;
Optional<bool> success; Optional<bool> success;
auto& bytecode = m_pattern.parser_result.bytecode; auto& bytecode = m_pattern->parser_result.bytecode;
for (;;) { for (;;) {
++output.operations; ++output.operations;

View file

@ -44,7 +44,7 @@ template<class Parser>
class Matcher final { class Matcher final {
public: public:
Matcher(Regex<Parser> const& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) Matcher(Regex<Parser> const* pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
: m_pattern(pattern) : m_pattern(pattern)
, m_regex_options(regex_options.value_or({})) , m_regex_options(regex_options.value_or({}))
{ {
@ -59,11 +59,16 @@ public:
return m_regex_options; return m_regex_options;
} }
void reset_pattern(Badge<Regex<Parser>>, Regex<Parser> const* pattern)
{
m_pattern = pattern;
}
private: private:
Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output, size_t recursion_level) const; Optional<bool> execute(MatchInput const& input, MatchState& state, MatchOutput& output, size_t recursion_level) const;
ALWAYS_INLINE Optional<bool> execute_low_prio_forks(MatchInput const& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const; ALWAYS_INLINE Optional<bool> execute_low_prio_forks(MatchInput const& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const;
Regex<Parser> const& m_pattern; Regex<Parser> const* m_pattern;
typename ParserTraits<Parser>::OptionsType const m_regex_options; typename ParserTraits<Parser>::OptionsType const m_regex_options;
}; };
@ -75,10 +80,10 @@ public:
OwnPtr<Matcher<Parser>> matcher { nullptr }; OwnPtr<Matcher<Parser>> matcher { nullptr };
mutable size_t start_offset { 0 }; mutable size_t start_offset { 0 };
explicit Regex(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options = {}); explicit Regex(String pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
~Regex() = default; ~Regex() = default;
Regex(Regex&&) = default; Regex(Regex&&);
Regex& operator=(Regex&&) = default; Regex& operator=(Regex&&);
typename ParserTraits<Parser>::OptionsType options() const; typename ParserTraits<Parser>::OptionsType options() const;
void print_bytecode(FILE* f = stdout) const; void print_bytecode(FILE* f = stdout) const;