mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 13:32:45 +00:00 
			
		
		
		
	LibJS/Bytecode: Don't reparse regular expressions on instantiation
The RegExpLiteral AST node already has the parsed regex::Parser::Result so let's plumb that over to the bytecode executable instead of reparsing the regex every time NewRegExp is executed. ~12% speed-up on language/literals/regexp/S7.8.5_A2.1_T2.js in test262.
This commit is contained in:
		
							parent
							
								
									a098f38cab
								
							
						
					
					
						commit
						c0f985ffcf
					
				
					 9 changed files with 109 additions and 4 deletions
				
			
		|  | @ -289,7 +289,12 @@ Bytecode::CodeGenerationErrorOr<void> RegExpLiteral::generate_bytecode(Bytecode: | |||
| { | ||||
|     auto source_index = generator.intern_string(m_pattern); | ||||
|     auto flags_index = generator.intern_string(m_flags); | ||||
|     generator.emit<Bytecode::Op::NewRegExp>(source_index, flags_index); | ||||
|     auto regex_index = generator.intern_regex(Bytecode::ParsedRegex { | ||||
|         .regex = m_parsed_regex, | ||||
|         .pattern = m_parsed_pattern, | ||||
|         .flags = m_parsed_flags, | ||||
|     }); | ||||
|     generator.emit<Bytecode::Op::NewRegExp>(source_index, flags_index, regex_index); | ||||
|     return {}; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ | |||
| #include <AK/WeakPtr.h> | ||||
| #include <LibJS/Bytecode/BasicBlock.h> | ||||
| #include <LibJS/Bytecode/IdentifierTable.h> | ||||
| #include <LibJS/Bytecode/RegexTable.h> | ||||
| #include <LibJS/Bytecode/StringTable.h> | ||||
| 
 | ||||
| namespace JS::Bytecode { | ||||
|  | @ -32,6 +33,7 @@ struct Executable { | |||
|     Vector<NonnullOwnPtr<BasicBlock>> basic_blocks; | ||||
|     NonnullOwnPtr<StringTable> string_table; | ||||
|     NonnullOwnPtr<IdentifierTable> identifier_table; | ||||
|     NonnullOwnPtr<RegexTable> regex_table; | ||||
|     size_t number_of_registers { 0 }; | ||||
|     bool is_strict_mode { false }; | ||||
| 
 | ||||
|  |  | |||
|  | @ -16,6 +16,7 @@ namespace JS::Bytecode { | |||
| Generator::Generator() | ||||
|     : m_string_table(make<StringTable>()) | ||||
|     , m_identifier_table(make<IdentifierTable>()) | ||||
|     , m_regex_table(make<RegexTable>()) | ||||
| { | ||||
| } | ||||
| 
 | ||||
|  | @ -67,6 +68,7 @@ CodeGenerationErrorOr<NonnullOwnPtr<Executable>> Generator::generate(ASTNode con | |||
|         .basic_blocks = move(generator.m_root_basic_blocks), | ||||
|         .string_table = move(generator.m_string_table), | ||||
|         .identifier_table = move(generator.m_identifier_table), | ||||
|         .regex_table = move(generator.m_regex_table), | ||||
|         .number_of_registers = generator.m_next_register, | ||||
|         .is_strict_mode = is_strict_mode, | ||||
|     }); | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ | |||
| #include <LibJS/Bytecode/StringTable.h> | ||||
| #include <LibJS/Forward.h> | ||||
| #include <LibJS/Runtime/FunctionKind.h> | ||||
| #include <LibRegex/Regex.h> | ||||
| 
 | ||||
| namespace JS::Bytecode { | ||||
| 
 | ||||
|  | @ -131,6 +132,11 @@ public: | |||
|         return m_string_table->insert(move(string)); | ||||
|     } | ||||
| 
 | ||||
|     RegexTableIndex intern_regex(ParsedRegex regex) | ||||
|     { | ||||
|         return m_regex_table->insert(move(regex)); | ||||
|     } | ||||
| 
 | ||||
|     IdentifierTableIndex intern_identifier(DeprecatedFlyString string) | ||||
|     { | ||||
|         return m_identifier_table->insert(move(string)); | ||||
|  | @ -220,6 +226,7 @@ private: | |||
|     Vector<NonnullOwnPtr<BasicBlock>> m_root_basic_blocks; | ||||
|     NonnullOwnPtr<StringTable> m_string_table; | ||||
|     NonnullOwnPtr<IdentifierTable> m_identifier_table; | ||||
|     NonnullOwnPtr<RegexTable> m_regex_table; | ||||
| 
 | ||||
|     u32 m_next_register { 2 }; | ||||
|     u32 m_next_block { 1 }; | ||||
|  |  | |||
|  | @ -328,14 +328,30 @@ ThrowCompletionOr<void> NewObject::execute_impl(Bytecode::Interpreter& interpret | |||
|     return {}; | ||||
| } | ||||
| 
 | ||||
| // 13.2.7.3 Runtime Semantics: Evaluation, https://tc39.es/ecma262/#sec-regular-expression-literals-runtime-semantics-evaluation
 | ||||
| ThrowCompletionOr<void> NewRegExp::execute_impl(Bytecode::Interpreter& interpreter) const | ||||
| { | ||||
|     auto& vm = interpreter.vm(); | ||||
| 
 | ||||
|     auto source = interpreter.current_executable().get_string(m_source_index); | ||||
|     auto& realm = *vm.current_realm(); | ||||
| 
 | ||||
|     // 1. Let pattern be CodePointsToString(BodyText of RegularExpressionLiteral).
 | ||||
|     auto pattern = interpreter.current_executable().get_string(m_source_index); | ||||
| 
 | ||||
|     // 2. Let flags be CodePointsToString(FlagText of RegularExpressionLiteral).
 | ||||
|     auto flags = interpreter.current_executable().get_string(m_flags_index); | ||||
| 
 | ||||
|     interpreter.accumulator() = TRY(regexp_create(vm, PrimitiveString::create(vm, source), PrimitiveString::create(vm, flags))); | ||||
|     // 3. Return ! RegExpCreate(pattern, flags).
 | ||||
|     auto& parsed_regex = interpreter.current_executable().regex_table->get(m_regex_index); | ||||
|     Regex<ECMA262> regex(parsed_regex.regex, parsed_regex.pattern, parsed_regex.flags); | ||||
|     // NOTE: We bypass RegExpCreate and subsequently RegExpAlloc as an optimization to use the already parsed values.
 | ||||
|     auto regexp_object = RegExpObject::create(realm, move(regex), move(pattern), move(flags)); | ||||
|     // RegExpAlloc has these two steps from the 'Legacy RegExp features' proposal.
 | ||||
|     regexp_object->set_realm(*vm.current_realm()); | ||||
|     // We don't need to check 'If SameValue(newTarget, thisRealm.[[Intrinsics]].[[%RegExp%]]) is true'
 | ||||
|     // here as we know RegExpCreate calls RegExpAlloc with %RegExp% for newTarget.
 | ||||
|     regexp_object->set_legacy_features_enabled(true); | ||||
|     interpreter.accumulator() = regexp_object; | ||||
|     return {}; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,6 +13,7 @@ | |||
| #include <LibJS/Bytecode/IdentifierTable.h> | ||||
| #include <LibJS/Bytecode/Instruction.h> | ||||
| #include <LibJS/Bytecode/Label.h> | ||||
| #include <LibJS/Bytecode/RegexTable.h> | ||||
| #include <LibJS/Bytecode/Register.h> | ||||
| #include <LibJS/Bytecode/StringTable.h> | ||||
| #include <LibJS/Heap/Cell.h> | ||||
|  | @ -196,10 +197,11 @@ public: | |||
| 
 | ||||
| class NewRegExp final : public Instruction { | ||||
| public: | ||||
|     NewRegExp(StringTableIndex source_index, StringTableIndex flags_index) | ||||
|     NewRegExp(StringTableIndex source_index, StringTableIndex flags_index, RegexTableIndex regex_index) | ||||
|         : Instruction(Type::NewRegExp) | ||||
|         , m_source_index(source_index) | ||||
|         , m_flags_index(flags_index) | ||||
|         , m_regex_index(regex_index) | ||||
|     { | ||||
|     } | ||||
| 
 | ||||
|  | @ -211,6 +213,7 @@ public: | |||
| private: | ||||
|     StringTableIndex m_source_index; | ||||
|     StringTableIndex m_flags_index; | ||||
|     RegexTableIndex m_regex_index; | ||||
| }; | ||||
| 
 | ||||
| #define JS_ENUMERATE_NEW_BUILTIN_ERROR_OPS(O) \ | ||||
|  |  | |||
							
								
								
									
										29
									
								
								Userland/Libraries/LibJS/Bytecode/RegexTable.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								Userland/Libraries/LibJS/Bytecode/RegexTable.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,29 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2023, Andreas Kling <kling@serenityos.org> | ||||
|  * | ||||
|  * SPDX-License-Identifier: BSD-2-Clause | ||||
|  */ | ||||
| 
 | ||||
| #include <LibJS/Bytecode/RegexTable.h> | ||||
| 
 | ||||
| namespace JS::Bytecode { | ||||
| 
 | ||||
| RegexTableIndex RegexTable::insert(ParsedRegex regex) | ||||
| { | ||||
|     m_regexes.append(move(regex)); | ||||
|     return m_regexes.size() - 1; | ||||
| } | ||||
| 
 | ||||
| ParsedRegex const& RegexTable::get(RegexTableIndex index) const | ||||
| { | ||||
|     return m_regexes[index.value()]; | ||||
| } | ||||
| 
 | ||||
| void RegexTable::dump() const | ||||
| { | ||||
|     outln("Regex Table:"); | ||||
|     for (size_t i = 0; i < m_regexes.size(); i++) | ||||
|         outln("{}: {}", i, m_regexes[i].pattern); | ||||
| } | ||||
| 
 | ||||
| } | ||||
							
								
								
									
										40
									
								
								Userland/Libraries/LibJS/Bytecode/RegexTable.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								Userland/Libraries/LibJS/Bytecode/RegexTable.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,40 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2023, Andreas Kling <kling@serenityos.org> | ||||
|  * | ||||
|  * SPDX-License-Identifier: BSD-2-Clause | ||||
|  */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <AK/DeprecatedString.h> | ||||
| #include <AK/DistinctNumeric.h> | ||||
| #include <AK/Vector.h> | ||||
| #include <LibRegex/RegexParser.h> | ||||
| 
 | ||||
| namespace JS::Bytecode { | ||||
| 
 | ||||
| AK_TYPEDEF_DISTINCT_NUMERIC_GENERAL(size_t, RegexTableIndex, Comparison); | ||||
| 
 | ||||
| struct ParsedRegex { | ||||
|     regex::Parser::Result regex; | ||||
|     DeprecatedString pattern; | ||||
|     regex::RegexOptions<ECMAScriptFlags> flags; | ||||
| }; | ||||
| 
 | ||||
| class RegexTable { | ||||
|     AK_MAKE_NONMOVABLE(RegexTable); | ||||
|     AK_MAKE_NONCOPYABLE(RegexTable); | ||||
| 
 | ||||
| public: | ||||
|     RegexTable() = default; | ||||
| 
 | ||||
|     RegexTableIndex insert(ParsedRegex); | ||||
|     ParsedRegex const& get(RegexTableIndex) const; | ||||
|     void dump() const; | ||||
|     bool is_empty() const { return m_regexes.is_empty(); } | ||||
| 
 | ||||
| private: | ||||
|     Vector<ParsedRegex> m_regexes; | ||||
| }; | ||||
| 
 | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Andreas Kling
						Andreas Kling