mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-25 01:32:31 +00:00 
			
		
		
		
	 18f4b6c670
			
		
	
	
		18f4b6c670
		
	
	
	
	
		
			
			This switches to using a simple string equality check if the regex pattern is strictly a string literal. Technically this optimisation can also be made on bounded literal patterns like /[abc]def/ or /abc|def/ as well, but those are significantly more complex to implement due to our bytecode-only approach.
		
			
				
	
	
		
			302 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			302 lines
		
	
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2020, Emanuel Sprung <emanuel.sprung@gmail.com>
 | |
|  *
 | |
|  * SPDX-License-Identifier: BSD-2-Clause
 | |
|  */
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include "RegexByteCode.h"
 | |
| #include "RegexMatch.h"
 | |
| #include "RegexOptions.h"
 | |
| #include "RegexParser.h"
 | |
| 
 | |
| #include <AK/Forward.h>
 | |
| #include <AK/GenericLexer.h>
 | |
| #include <AK/HashMap.h>
 | |
| #include <AK/Types.h>
 | |
| #include <AK/Utf32View.h>
 | |
| #include <AK/Vector.h>
 | |
| #include <ctype.h>
 | |
| 
 | |
| #include <stdio.h>
 | |
| 
 | |
| namespace regex {
 | |
| 
 | |
| namespace Detail {
 | |
| 
 | |
| struct Block {
 | |
|     size_t start;
 | |
|     size_t end;
 | |
| };
 | |
| 
 | |
| }
 | |
| 
 | |
| static constexpr const size_t c_max_recursion = 5000;
 | |
| static constexpr const size_t c_match_preallocation_count = 0;
 | |
| 
 | |
| struct RegexResult final {
 | |
|     bool success { false };
 | |
|     size_t count { 0 };
 | |
|     Vector<Match> matches;
 | |
|     Vector<Vector<Match>> capture_group_matches;
 | |
|     size_t n_operations { 0 };
 | |
|     size_t n_capture_groups { 0 };
 | |
|     size_t n_named_capture_groups { 0 };
 | |
| };
 | |
| 
 | |
| template<class Parser>
 | |
| class Regex;
 | |
| 
 | |
| template<class Parser>
 | |
| class Matcher final {
 | |
| 
 | |
| public:
 | |
|     Matcher(Regex<Parser> const* pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
|         : m_pattern(pattern)
 | |
|         , m_regex_options(regex_options.value_or({}))
 | |
|     {
 | |
|     }
 | |
|     ~Matcher() = default;
 | |
| 
 | |
|     RegexResult match(RegexStringView, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
 | |
|     RegexResult match(Vector<RegexStringView> const&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
 | |
| 
 | |
|     typename ParserTraits<Parser>::OptionsType options() const
 | |
|     {
 | |
|         return m_regex_options;
 | |
|     }
 | |
| 
 | |
|     void reset_pattern(Badge<Regex<Parser>>, Regex<Parser> const* pattern)
 | |
|     {
 | |
|         m_pattern = pattern;
 | |
|     }
 | |
| 
 | |
| private:
 | |
|     bool execute(MatchInput const& input, MatchState& state, size_t& operations) const;
 | |
| 
 | |
|     Regex<Parser> const* m_pattern;
 | |
|     typename ParserTraits<Parser>::OptionsType const m_regex_options;
 | |
| };
 | |
| 
 | |
| template<class Parser>
 | |
| class Regex final {
 | |
| public:
 | |
|     DeprecatedString pattern_value;
 | |
|     regex::Parser::Result parser_result;
 | |
|     OwnPtr<Matcher<Parser>> matcher { nullptr };
 | |
|     mutable size_t start_offset { 0 };
 | |
| 
 | |
|     static regex::Parser::Result parse_pattern(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
 | |
| 
 | |
|     explicit Regex(DeprecatedString pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
 | |
|     Regex(regex::Parser::Result parse_result, DeprecatedString pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
 | |
|     ~Regex() = default;
 | |
|     Regex(Regex&&);
 | |
|     Regex& operator=(Regex&&);
 | |
| 
 | |
|     typename ParserTraits<Parser>::OptionsType options() const;
 | |
|     DeprecatedString error_string(Optional<DeprecatedString> message = {}) const;
 | |
| 
 | |
|     RegexResult match(RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         if (!matcher || parser_result.error != Error::NoError)
 | |
|             return {};
 | |
|         return matcher->match(view, regex_options);
 | |
|     }
 | |
| 
 | |
|     RegexResult match(Vector<RegexStringView> const& views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         if (!matcher || parser_result.error != Error::NoError)
 | |
|             return {};
 | |
|         return matcher->match(views, regex_options);
 | |
|     }
 | |
| 
 | |
|     DeprecatedString replace(RegexStringView view, StringView replacement_pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         if (!matcher || parser_result.error != Error::NoError)
 | |
|             return {};
 | |
| 
 | |
|         StringBuilder builder;
 | |
|         size_t start_offset = 0;
 | |
|         RegexResult result = matcher->match(view, regex_options);
 | |
|         if (!result.success)
 | |
|             return view.to_deprecated_string();
 | |
| 
 | |
|         for (size_t i = 0; i < result.matches.size(); ++i) {
 | |
|             auto& match = result.matches[i];
 | |
|             builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_deprecated_string());
 | |
|             start_offset = match.global_offset + match.view.length();
 | |
|             GenericLexer lexer(replacement_pattern);
 | |
|             while (!lexer.is_eof()) {
 | |
|                 if (lexer.consume_specific('\\')) {
 | |
|                     if (lexer.consume_specific('\\')) {
 | |
|                         builder.append('\\');
 | |
|                         continue;
 | |
|                     }
 | |
|                     auto number = lexer.consume_while(isdigit);
 | |
|                     if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) {
 | |
|                         builder.append(result.capture_group_matches[i][index.value() - 1].view.to_deprecated_string());
 | |
|                     } else {
 | |
|                         builder.appendff("\\{}", number);
 | |
|                     }
 | |
|                 } else {
 | |
|                     builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; }));
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         builder.append(view.substring_view(start_offset, view.length() - start_offset).to_deprecated_string());
 | |
| 
 | |
|         return builder.to_deprecated_string();
 | |
|     }
 | |
| 
 | |
|     // FIXME: replace(Vector<RegexStringView> const , ...)
 | |
| 
 | |
|     RegexResult search(RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         if (!matcher || parser_result.error != Error::NoError)
 | |
|             return {};
 | |
| 
 | |
|         AllOptions options = (AllOptions)regex_options.value_or({});
 | |
|         if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
 | |
|             options.reset_flag(AllFlags::MatchNotEndOfLine);
 | |
|             options.reset_flag(AllFlags::MatchNotBeginOfLine);
 | |
|         }
 | |
|         options.reset_flag(AllFlags::Internal_Stateful);
 | |
|         options |= AllFlags::Global;
 | |
| 
 | |
|         return matcher->match(view, options);
 | |
|     }
 | |
| 
 | |
|     RegexResult search(Vector<RegexStringView> const& views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         if (!matcher || parser_result.error != Error::NoError)
 | |
|             return {};
 | |
| 
 | |
|         AllOptions options = (AllOptions)regex_options.value_or({});
 | |
|         if ((options & AllFlags::MatchNotBeginOfLine) && (options & AllFlags::MatchNotEndOfLine)) {
 | |
|             options.reset_flag(AllFlags::MatchNotEndOfLine);
 | |
|             options.reset_flag(AllFlags::MatchNotBeginOfLine);
 | |
|         }
 | |
|         options.reset_flag(AllFlags::Internal_Stateful);
 | |
|         options |= AllFlags::Global;
 | |
| 
 | |
|         return matcher->match(views, options);
 | |
|     }
 | |
| 
 | |
|     bool match(RegexStringView view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         m = match(view, regex_options);
 | |
|         return m.success;
 | |
|     }
 | |
| 
 | |
|     bool match(Vector<RegexStringView> const& views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         m = match(views, regex_options);
 | |
|         return m.success;
 | |
|     }
 | |
| 
 | |
|     bool search(RegexStringView view, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         m = search(view, regex_options);
 | |
|         return m.success;
 | |
|     }
 | |
| 
 | |
|     bool search(Vector<RegexStringView> const& views, RegexResult& m, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         m = search(views, regex_options);
 | |
|         return m.success;
 | |
|     }
 | |
| 
 | |
|     bool has_match(RegexStringView view, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         if (!matcher || parser_result.error != Error::NoError)
 | |
|             return false;
 | |
|         RegexResult result = matcher->match(view, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
 | |
|         return result.success;
 | |
|     }
 | |
| 
 | |
|     bool has_match(Vector<RegexStringView> const& views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {}) const
 | |
|     {
 | |
|         if (!matcher || parser_result.error != Error::NoError)
 | |
|             return false;
 | |
|         RegexResult result = matcher->match(views, AllOptions { regex_options.value_or({}) } | AllFlags::SkipSubExprResults);
 | |
|         return result.success;
 | |
|     }
 | |
| 
 | |
|     using BasicBlockList = Vector<Detail::Block>;
 | |
|     static BasicBlockList split_basic_blocks(ByteCode const&);
 | |
| 
 | |
| private:
 | |
|     void run_optimization_passes();
 | |
|     void attempt_rewrite_loops_as_atomic_groups(BasicBlockList const&);
 | |
|     bool attempt_rewrite_entire_match_as_substring_search(BasicBlockList const&);
 | |
| };
 | |
| 
 | |
| // free standing functions for match, search and has_match
 | |
| template<class Parser>
 | |
| RegexResult match(RegexStringView view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.match(view, regex_options);
 | |
| }
 | |
| 
 | |
| template<class Parser>
 | |
| RegexResult match(Vector<RegexStringView> const& view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.match(view, regex_options);
 | |
| }
 | |
| 
 | |
| template<class Parser>
 | |
| bool match(RegexStringView view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.match(view, regex_options);
 | |
| }
 | |
| 
 | |
| template<class Parser>
 | |
| bool match(Vector<RegexStringView> const& view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.match(view, regex_options);
 | |
| }
 | |
| 
 | |
| template<class Parser>
 | |
| RegexResult search(RegexStringView view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.search(view, regex_options);
 | |
| }
 | |
| 
 | |
| template<class Parser>
 | |
| RegexResult search(Vector<RegexStringView> const& views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.search(views, regex_options);
 | |
| }
 | |
| 
 | |
| template<class Parser>
 | |
| bool search(RegexStringView view, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.search(view, regex_options);
 | |
| }
 | |
| 
 | |
| template<class Parser>
 | |
| bool search(Vector<RegexStringView> const& views, Regex<Parser>& pattern, RegexResult&, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.search(views, regex_options);
 | |
| }
 | |
| 
 | |
| template<class Parser>
 | |
| bool has_match(RegexStringView view, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.has_match(view, regex_options);
 | |
| }
 | |
| 
 | |
| template<class Parser>
 | |
| bool has_match(Vector<RegexStringView> const& views, Regex<Parser>& pattern, Optional<typename ParserTraits<Parser>::OptionsType> regex_options = {})
 | |
| {
 | |
|     return pattern.has_match(views, regex_options);
 | |
| }
 | |
| }
 | |
| 
 | |
| using regex::has_match;
 | |
| using regex::match;
 | |
| using regex::Regex;
 | |
| using regex::RegexResult;
 |