From b162517065fb78639173b04dc050dfe84ddcebfd Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 29 Jul 2021 10:21:47 -0400 Subject: [PATCH] LibRegex: Take ownership of pattern string and fix move operations The Regex object created a copy of the pattern string anyways, so tweak the constructor to allow callers to move() pattern strings into the regex. The Regex move constructor and assignment operator currently result in memory corruption. The Regex object stores a Matcher object, which holds a reference to the Regex object. So when the Regex object is moved, that reference is no longer valid. To fix this, the reference stored in the Matcher must be updated when the Regex is moved. --- Userland/Libraries/LibRegex/RegexMatcher.cpp | 57 ++++++++++++++------ Userland/Libraries/LibRegex/RegexMatcher.h | 15 ++++-- 2 files changed, 50 insertions(+), 22 deletions(-) diff --git a/Userland/Libraries/LibRegex/RegexMatcher.cpp b/Userland/Libraries/LibRegex/RegexMatcher.cpp index f4a848741a..bd9680d41a 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.cpp +++ b/Userland/Libraries/LibRegex/RegexMatcher.cpp @@ -19,22 +19,45 @@ static RegexDebug s_regex_dbg(stderr); #endif template -Regex::Regex(StringView pattern, typename ParserTraits::OptionsType regex_options) +Regex::Regex(String pattern, typename ParserTraits::OptionsType regex_options) + : pattern_value(move(pattern)) { - pattern_value = pattern.to_string(); - regex::Lexer lexer(pattern); + regex::Lexer lexer(pattern_value); Parser parser(lexer, regex_options); parser_result = parser.parse(); if (parser_result.error == regex::Error::NoError) - matcher = make>(*this, regex_options); + matcher = make>(this, regex_options); +} + +template +Regex::Regex(Regex&& regex) + : pattern_value(move(regex.pattern_value)) + , parser_result(move(regex.parser_result)) + , matcher(move(regex.matcher)) + , start_offset(regex.start_offset) +{ + if (matcher) + matcher->reset_pattern({}, this); +} + +template +Regex& Regex::operator=(Regex&& regex) +{ + pattern_value = move(regex.pattern_value); + parser_result = move(regex.parser_result); + matcher = move(regex.matcher); + if (matcher) + matcher->reset_pattern({}, this); + start_offset = regex.start_offset; + return *this; } template typename ParserTraits::OptionsType Regex::options() const { - if (parser_result.error != Error::NoError) + if (!matcher || parser_result.error != Error::NoError) return {}; return matcher->options(); @@ -71,7 +94,7 @@ RegexResult Matcher::match(Vector const views, Optional { // If the pattern *itself* isn't stateful, reset any changes to start_offset. if (!((AllFlags)m_regex_options.value() & AllFlags::Internal_Stateful)) - m_pattern.start_offset = 0; + m_pattern->start_offset = 0; size_t match_count { 0 }; @@ -80,7 +103,7 @@ RegexResult Matcher::match(Vector const views, Optional MatchOutput output; input.regex_options = m_regex_options | regex_options.value_or({}).value(); - input.start_offset = m_pattern.start_offset; + input.start_offset = m_pattern->start_offset; output.operations = 0; size_t lines_to_skip = 0; @@ -107,8 +130,8 @@ RegexResult Matcher::match(Vector const views, Optional state.capture_group_matches.ensure_capacity(c_match_preallocation_count); state.named_capture_group_matches.ensure_capacity(c_match_preallocation_count); - auto& capture_groups_count = m_pattern.parser_result.capture_groups_count; - auto& named_capture_groups_count = m_pattern.parser_result.named_capture_groups_count; + auto& capture_groups_count = m_pattern->parser_result.capture_groups_count; + auto& named_capture_groups_count = m_pattern->parser_result.named_capture_groups_count; for (size_t j = 0; j < c_match_preallocation_count; ++j) { state.matches.empend(); @@ -152,11 +175,11 @@ RegexResult Matcher::match(Vector const views, Optional dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view); auto view_length = view.length(); - size_t view_index = m_pattern.start_offset; + size_t view_index = m_pattern->start_offset; state.string_position = view_index; bool succeeded = false; - if (view_index == view_length && m_pattern.parser_result.match_length_minimum == 0) { + if (view_index == view_length && m_pattern->parser_result.match_length_minimum == 0) { // Run the code until it tries to consume something. // This allows non-consuming code to run on empty strings, for instance // e.g. "Exit" @@ -183,7 +206,7 @@ RegexResult Matcher::match(Vector const views, Optional } for (; view_index < view_length; ++view_index) { - auto& match_length_minimum = m_pattern.parser_result.match_length_minimum; + auto& match_length_minimum = m_pattern->parser_result.match_length_minimum; // FIXME: More performant would be to know the remaining minimum string // length needed to match from the current position onwards within // the vm. Add new OpCode for MinMatchLengthFromSp with the value of @@ -249,7 +272,7 @@ RegexResult Matcher::match(Vector const views, Optional input.global_offset += view.length() + 1; // +1 includes the line break character if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) - m_pattern.start_offset = state.string_position; + m_pattern->start_offset = state.string_position; if (succeeded && !continue_search) break; @@ -262,7 +285,7 @@ RegexResult Matcher::match(Vector const views, Optional if (output_copy.capture_group_matches.size() < match_count) output_copy.capture_group_matches.resize(match_count); for (auto& matches : output_copy.capture_group_matches) - matches.resize(m_pattern.parser_result.capture_groups_count + 1); + matches.resize(m_pattern->parser_result.capture_groups_count + 1); if (!input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) { for (auto& matches : output_copy.capture_group_matches) matches.template remove_all_matching([](auto& match) { return match.view.is_null(); }); @@ -286,8 +309,8 @@ RegexResult Matcher::match(Vector const views, Optional move(output_copy.capture_group_matches), move(output_copy.named_capture_group_matches), output.operations, - m_pattern.parser_result.capture_groups_count, - m_pattern.parser_result.named_capture_groups_count, + m_pattern->parser_result.capture_groups_count, + m_pattern->parser_result.named_capture_groups_count, }; } @@ -301,7 +324,7 @@ Optional Matcher::execute(MatchInput const& input, MatchState& sta MatchState fork_high_prio_state; Optional success; - auto& bytecode = m_pattern.parser_result.bytecode; + auto& bytecode = m_pattern->parser_result.bytecode; for (;;) { ++output.operations; diff --git a/Userland/Libraries/LibRegex/RegexMatcher.h b/Userland/Libraries/LibRegex/RegexMatcher.h index 965fb02695..35e72962ce 100644 --- a/Userland/Libraries/LibRegex/RegexMatcher.h +++ b/Userland/Libraries/LibRegex/RegexMatcher.h @@ -44,7 +44,7 @@ template class Matcher final { public: - Matcher(Regex const& pattern, Optional::OptionsType> regex_options = {}) + Matcher(Regex const* pattern, Optional::OptionsType> regex_options = {}) : m_pattern(pattern) , m_regex_options(regex_options.value_or({})) { @@ -59,11 +59,16 @@ public: return m_regex_options; } + void reset_pattern(Badge>, Regex const* pattern) + { + m_pattern = pattern; + } + private: Optional execute(MatchInput const& input, MatchState& state, MatchOutput& output, size_t recursion_level) const; ALWAYS_INLINE Optional execute_low_prio_forks(MatchInput const& input, MatchState& original_state, MatchOutput& output, Vector states, size_t recursion_level) const; - Regex const& m_pattern; + Regex const* m_pattern; typename ParserTraits::OptionsType const m_regex_options; }; @@ -75,10 +80,10 @@ public: OwnPtr> matcher { nullptr }; mutable size_t start_offset { 0 }; - explicit Regex(StringView pattern, typename ParserTraits::OptionsType regex_options = {}); + explicit Regex(String pattern, typename ParserTraits::OptionsType regex_options = {}); ~Regex() = default; - Regex(Regex&&) = default; - Regex& operator=(Regex&&) = default; + Regex(Regex&&); + Regex& operator=(Regex&&); typename ParserTraits::OptionsType options() const; void print_bytecode(FILE* f = stdout) const;