diff --git a/Userland/Libraries/LibJS/Bytecode/ASTCodegen.cpp b/Userland/Libraries/LibJS/Bytecode/ASTCodegen.cpp index 3e65242239..1dbad87d0e 100644 --- a/Userland/Libraries/LibJS/Bytecode/ASTCodegen.cpp +++ b/Userland/Libraries/LibJS/Bytecode/ASTCodegen.cpp @@ -289,7 +289,12 @@ Bytecode::CodeGenerationErrorOr RegExpLiteral::generate_bytecode(Bytecode: { auto source_index = generator.intern_string(m_pattern); auto flags_index = generator.intern_string(m_flags); - generator.emit(source_index, flags_index); + auto regex_index = generator.intern_regex(Bytecode::ParsedRegex { + .regex = m_parsed_regex, + .pattern = m_parsed_pattern, + .flags = m_parsed_flags, + }); + generator.emit(source_index, flags_index, regex_index); return {}; } diff --git a/Userland/Libraries/LibJS/Bytecode/Executable.h b/Userland/Libraries/LibJS/Bytecode/Executable.h index 3a9b817daa..103636d55e 100644 --- a/Userland/Libraries/LibJS/Bytecode/Executable.h +++ b/Userland/Libraries/LibJS/Bytecode/Executable.h @@ -11,6 +11,7 @@ #include #include #include +#include #include namespace JS::Bytecode { @@ -32,6 +33,7 @@ struct Executable { Vector> basic_blocks; NonnullOwnPtr string_table; NonnullOwnPtr identifier_table; + NonnullOwnPtr regex_table; size_t number_of_registers { 0 }; bool is_strict_mode { false }; diff --git a/Userland/Libraries/LibJS/Bytecode/Generator.cpp b/Userland/Libraries/LibJS/Bytecode/Generator.cpp index 834e26e92d..33cc703f4b 100644 --- a/Userland/Libraries/LibJS/Bytecode/Generator.cpp +++ b/Userland/Libraries/LibJS/Bytecode/Generator.cpp @@ -16,6 +16,7 @@ namespace JS::Bytecode { Generator::Generator() : m_string_table(make()) , m_identifier_table(make()) + , m_regex_table(make()) { } @@ -67,6 +68,7 @@ CodeGenerationErrorOr> Generator::generate(ASTNode con .basic_blocks = move(generator.m_root_basic_blocks), .string_table = move(generator.m_string_table), .identifier_table = move(generator.m_identifier_table), + .regex_table = move(generator.m_regex_table), .number_of_registers = generator.m_next_register, .is_strict_mode = is_strict_mode, }); diff --git a/Userland/Libraries/LibJS/Bytecode/Generator.h b/Userland/Libraries/LibJS/Bytecode/Generator.h index 772e81f5d5..15bf400bfe 100644 --- a/Userland/Libraries/LibJS/Bytecode/Generator.h +++ b/Userland/Libraries/LibJS/Bytecode/Generator.h @@ -18,6 +18,7 @@ #include #include #include +#include namespace JS::Bytecode { @@ -131,6 +132,11 @@ public: return m_string_table->insert(move(string)); } + RegexTableIndex intern_regex(ParsedRegex regex) + { + return m_regex_table->insert(move(regex)); + } + IdentifierTableIndex intern_identifier(DeprecatedFlyString string) { return m_identifier_table->insert(move(string)); @@ -220,6 +226,7 @@ private: Vector> m_root_basic_blocks; NonnullOwnPtr m_string_table; NonnullOwnPtr m_identifier_table; + NonnullOwnPtr m_regex_table; u32 m_next_register { 2 }; u32 m_next_block { 1 }; diff --git a/Userland/Libraries/LibJS/Bytecode/Op.cpp b/Userland/Libraries/LibJS/Bytecode/Op.cpp index 69c1a10e18..e928721a30 100644 --- a/Userland/Libraries/LibJS/Bytecode/Op.cpp +++ b/Userland/Libraries/LibJS/Bytecode/Op.cpp @@ -328,14 +328,30 @@ ThrowCompletionOr NewObject::execute_impl(Bytecode::Interpreter& interpret return {}; } +// 13.2.7.3 Runtime Semantics: Evaluation, https://tc39.es/ecma262/#sec-regular-expression-literals-runtime-semantics-evaluation ThrowCompletionOr NewRegExp::execute_impl(Bytecode::Interpreter& interpreter) const { auto& vm = interpreter.vm(); - auto source = interpreter.current_executable().get_string(m_source_index); + auto& realm = *vm.current_realm(); + + // 1. Let pattern be CodePointsToString(BodyText of RegularExpressionLiteral). + auto pattern = interpreter.current_executable().get_string(m_source_index); + + // 2. Let flags be CodePointsToString(FlagText of RegularExpressionLiteral). auto flags = interpreter.current_executable().get_string(m_flags_index); - interpreter.accumulator() = TRY(regexp_create(vm, PrimitiveString::create(vm, source), PrimitiveString::create(vm, flags))); + // 3. Return ! RegExpCreate(pattern, flags). + auto& parsed_regex = interpreter.current_executable().regex_table->get(m_regex_index); + Regex regex(parsed_regex.regex, parsed_regex.pattern, parsed_regex.flags); + // NOTE: We bypass RegExpCreate and subsequently RegExpAlloc as an optimization to use the already parsed values. + auto regexp_object = RegExpObject::create(realm, move(regex), move(pattern), move(flags)); + // RegExpAlloc has these two steps from the 'Legacy RegExp features' proposal. + regexp_object->set_realm(*vm.current_realm()); + // We don't need to check 'If SameValue(newTarget, thisRealm.[[Intrinsics]].[[%RegExp%]]) is true' + // here as we know RegExpCreate calls RegExpAlloc with %RegExp% for newTarget. + regexp_object->set_legacy_features_enabled(true); + interpreter.accumulator() = regexp_object; return {}; } diff --git a/Userland/Libraries/LibJS/Bytecode/Op.h b/Userland/Libraries/LibJS/Bytecode/Op.h index e419b79436..ff1b1747a2 100644 --- a/Userland/Libraries/LibJS/Bytecode/Op.h +++ b/Userland/Libraries/LibJS/Bytecode/Op.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -196,10 +197,11 @@ public: class NewRegExp final : public Instruction { public: - NewRegExp(StringTableIndex source_index, StringTableIndex flags_index) + NewRegExp(StringTableIndex source_index, StringTableIndex flags_index, RegexTableIndex regex_index) : Instruction(Type::NewRegExp) , m_source_index(source_index) , m_flags_index(flags_index) + , m_regex_index(regex_index) { } @@ -211,6 +213,7 @@ public: private: StringTableIndex m_source_index; StringTableIndex m_flags_index; + RegexTableIndex m_regex_index; }; #define JS_ENUMERATE_NEW_BUILTIN_ERROR_OPS(O) \ diff --git a/Userland/Libraries/LibJS/Bytecode/RegexTable.cpp b/Userland/Libraries/LibJS/Bytecode/RegexTable.cpp new file mode 100644 index 0000000000..42c29a4933 --- /dev/null +++ b/Userland/Libraries/LibJS/Bytecode/RegexTable.cpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2023, Andreas Kling + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include + +namespace JS::Bytecode { + +RegexTableIndex RegexTable::insert(ParsedRegex regex) +{ + m_regexes.append(move(regex)); + return m_regexes.size() - 1; +} + +ParsedRegex const& RegexTable::get(RegexTableIndex index) const +{ + return m_regexes[index.value()]; +} + +void RegexTable::dump() const +{ + outln("Regex Table:"); + for (size_t i = 0; i < m_regexes.size(); i++) + outln("{}: {}", i, m_regexes[i].pattern); +} + +} diff --git a/Userland/Libraries/LibJS/Bytecode/RegexTable.h b/Userland/Libraries/LibJS/Bytecode/RegexTable.h new file mode 100644 index 0000000000..6951d6f08b --- /dev/null +++ b/Userland/Libraries/LibJS/Bytecode/RegexTable.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023, Andreas Kling + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include + +namespace JS::Bytecode { + +AK_TYPEDEF_DISTINCT_NUMERIC_GENERAL(size_t, RegexTableIndex, Comparison); + +struct ParsedRegex { + regex::Parser::Result regex; + DeprecatedString pattern; + regex::RegexOptions flags; +}; + +class RegexTable { + AK_MAKE_NONMOVABLE(RegexTable); + AK_MAKE_NONCOPYABLE(RegexTable); + +public: + RegexTable() = default; + + RegexTableIndex insert(ParsedRegex); + ParsedRegex const& get(RegexTableIndex) const; + void dump() const; + bool is_empty() const { return m_regexes.is_empty(); } + +private: + Vector m_regexes; +}; + +} diff --git a/Userland/Libraries/LibJS/CMakeLists.txt b/Userland/Libraries/LibJS/CMakeLists.txt index a35daacd1d..0424c367f6 100644 --- a/Userland/Libraries/LibJS/CMakeLists.txt +++ b/Userland/Libraries/LibJS/CMakeLists.txt @@ -15,6 +15,7 @@ set(SOURCES Bytecode/Pass/MergeBlocks.cpp Bytecode/Pass/PlaceBlocks.cpp Bytecode/Pass/UnifySameBlocks.cpp + Bytecode/RegexTable.cpp Bytecode/StringTable.cpp Console.cpp Contrib/Test262/$262Object.cpp