From f39b6ae3c672d639f65af4a296bcfd7fca43e5fe Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Wed, 19 Oct 2022 15:35:35 +0200 Subject: [PATCH] LibJS: Avoid expensive UTF-8/16 conversion in legacy RegExp properties Let's not incur the cost of a synchronous conversion to UTF-8 for all the legacy static properties after running a regular expression. The SunSpider subtest regexp-dna goes from taking ~25 sec to ~0.7 sec on my machine. --- .../Runtime/RegExpLegacyStaticProperties.cpp | 38 ++++---- .../Runtime/RegExpLegacyStaticProperties.h | 91 ++++++++++--------- .../LibJS/Runtime/RegExpPrototype.cpp | 9 +- 3 files changed, 70 insertions(+), 68 deletions(-) diff --git a/Userland/Libraries/LibJS/Runtime/RegExpLegacyStaticProperties.cpp b/Userland/Libraries/LibJS/Runtime/RegExpLegacyStaticProperties.cpp index 0682234f6c..26f16602bb 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpLegacyStaticProperties.cpp +++ b/Userland/Libraries/LibJS/Runtime/RegExpLegacyStaticProperties.cpp @@ -29,7 +29,7 @@ void RegExpLegacyStaticProperties::invalidate() } // GetLegacyRegExpStaticProperty( C, thisValue, internalSlotName ), https://github.com/tc39/proposal-regexp-legacy-features#getlegacyregexpstaticproperty-c-thisvalue-internalslotname- -ThrowCompletionOr get_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, Optional const& (RegExpLegacyStaticProperties::*property_getter)() const) +ThrowCompletionOr get_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, Optional const& (RegExpLegacyStaticProperties::*property_getter)() const) { // 1. Assert C is an object that has an internal slot named internalSlotName. @@ -49,7 +49,7 @@ ThrowCompletionOr get_legacy_regexp_static_property(VM& vm, RegExpConstru } // SetLegacyRegExpStaticProperty( C, thisValue, internalSlotName, val ), https://github.com/tc39/proposal-regexp-legacy-features#setlegacyregexpstaticproperty-c-thisvalue-internalslotname-val- -ThrowCompletionOr set_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, void (RegExpLegacyStaticProperties::*property_setter)(String), Value value) +ThrowCompletionOr set_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, void (RegExpLegacyStaticProperties::*property_setter)(Utf16String), Value value) { // 1. Assert C is an object that has an internal slot named internalSlotName. @@ -58,7 +58,7 @@ ThrowCompletionOr set_legacy_regexp_static_property(VM& vm, RegExpConstruc return vm.throw_completion(ErrorType::SetLegacyRegExpStaticPropertyThisValueMismatch); // 3. Let strVal be ? ToString(val). - auto str_value = TRY(value.to_string(vm)); + auto str_value = TRY(value.to_utf16_string(vm)); // 4. Set the value of the internal slot of C named internalSlotName to strVal. (constructor.legacy_static_properties().*property_setter)(str_value); @@ -67,7 +67,7 @@ ThrowCompletionOr set_legacy_regexp_static_property(VM& vm, RegExpConstruc } // UpdateLegacyRegExpStaticProperties ( C, S, startIndex, endIndex, capturedValues ), https://github.com/tc39/proposal-regexp-legacy-features#updatelegacyregexpstaticproperties--c-s-startindex-endindex-capturedvalues- -void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf16String const& string, size_t start_index, size_t end_index, Vector const& captured_values) +void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf16String const& string, size_t start_index, size_t end_index, Vector const& captured_values) { auto& legacy_static_properties = constructor.legacy_static_properties(); @@ -87,11 +87,11 @@ void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf1 auto group_count = captured_values.size(); // 7. Set the value of C’s [[RegExpInput]] internal slot to S. - legacy_static_properties.set_input(string.to_utf8()); + legacy_static_properties.set_input(string); // 8. Set the value of C’s [[RegExpLastMatch]] internal slot to a String whose length is endIndex - startIndex and containing the code units from S with indices startIndex through endIndex - 1, in ascending order. auto last_match = string.view().substring_view(start_index, end_index - start_index); - legacy_static_properties.set_last_match(last_match.to_utf8()); + legacy_static_properties.set_last_match(Utf16String(last_match)); // 9. If n > 0, set the value of C’s [[RegExpLastParen]] internal slot to the last element of capturedValues. if (group_count > 0) { @@ -100,20 +100,20 @@ void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf1 } // 10. Else, set the value of C’s [[RegExpLastParen]] internal slot to the empty String. else { - legacy_static_properties.set_last_paren(String::empty()); + legacy_static_properties.set_last_paren(Utf16String(""sv)); } // 11. Set the value of C’s [[RegExpLeftContext]] internal slot to a String whose length is startIndex and containing the code units from S with indices 0 through startIndex - 1, in ascending order. auto left_context = string.view().substring_view(0, start_index); - legacy_static_properties.set_left_context(left_context.to_utf8()); + legacy_static_properties.set_left_context(Utf16String(left_context)); // 12. Set the value of C’s [[RegExpRightContext]] internal slot to a String whose length is len - endIndex and containing the code units from S with indices endIndex through len - 1, in ascending order. auto right_context = string.view().substring_view(end_index, len - end_index); - legacy_static_properties.set_right_context(right_context.to_utf8()); + legacy_static_properties.set_right_context(Utf16String(right_context)); // 13. For each integer i such that 1 ≤ i ≤ 9 for (size_t i = 1; i <= 9; i++) { - auto value = String::empty(); + auto value = Utf16String(""sv); // If i ≤ n, set the value of C’s [[RegExpPareni]] internal slot to the ith element of capturedValues. if (i <= group_count) { value = captured_values[i - 1]; @@ -124,23 +124,23 @@ void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf1 } if (i == 1) { - legacy_static_properties.set_$1(value); + legacy_static_properties.set_$1(Utf16String(value)); } else if (i == 2) { - legacy_static_properties.set_$2(value); + legacy_static_properties.set_$2(Utf16String(value)); } else if (i == 3) { - legacy_static_properties.set_$3(value); + legacy_static_properties.set_$3(Utf16String(value)); } else if (i == 4) { - legacy_static_properties.set_$4(value); + legacy_static_properties.set_$4(Utf16String(value)); } else if (i == 5) { - legacy_static_properties.set_$5(value); + legacy_static_properties.set_$5(Utf16String(value)); } else if (i == 6) { - legacy_static_properties.set_$6(value); + legacy_static_properties.set_$6(Utf16String(value)); } else if (i == 7) { - legacy_static_properties.set_$7(value); + legacy_static_properties.set_$7(Utf16String(value)); } else if (i == 8) { - legacy_static_properties.set_$8(value); + legacy_static_properties.set_$8(Utf16String(value)); } else if (i == 9) { - legacy_static_properties.set_$9(value); + legacy_static_properties.set_$9(Utf16String(value)); } } } diff --git a/Userland/Libraries/LibJS/Runtime/RegExpLegacyStaticProperties.h b/Userland/Libraries/LibJS/Runtime/RegExpLegacyStaticProperties.h index e036fb7e67..2216c97c96 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpLegacyStaticProperties.h +++ b/Userland/Libraries/LibJS/Runtime/RegExpLegacyStaticProperties.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace JS { @@ -22,57 +23,57 @@ namespace JS { // [[RegExpParen1]] ... [[RegExpParen9]] class RegExpLegacyStaticProperties { public: - Optional const& input() const { return m_input; } - Optional const& last_match() const { return m_last_match; } - Optional const& last_paren() const { return m_last_paren; } - Optional const& left_context() const { return m_left_context; } - Optional const& right_context() const { return m_right_context; } - Optional const& $1() const { return m_$1; } - Optional const& $2() const { return m_$2; } - Optional const& $3() const { return m_$3; } - Optional const& $4() const { return m_$4; } - Optional const& $5() const { return m_$5; } - Optional const& $6() const { return m_$6; } - Optional const& $7() const { return m_$7; } - Optional const& $8() const { return m_$8; } - Optional const& $9() const { return m_$9; } + Optional const& input() const { return m_input; } + Optional const& last_match() const { return m_last_match; } + Optional const& last_paren() const { return m_last_paren; } + Optional const& left_context() const { return m_left_context; } + Optional const& right_context() const { return m_right_context; } + Optional const& $1() const { return m_$1; } + Optional const& $2() const { return m_$2; } + Optional const& $3() const { return m_$3; } + Optional const& $4() const { return m_$4; } + Optional const& $5() const { return m_$5; } + Optional const& $6() const { return m_$6; } + Optional const& $7() const { return m_$7; } + Optional const& $8() const { return m_$8; } + Optional const& $9() const { return m_$9; } - void set_input(String input) { m_input = move(input); } - void set_last_match(String last_match) { m_last_match = move(last_match); } - void set_last_paren(String last_paren) { m_last_paren = move(last_paren); } - void set_left_context(String left_context) { m_left_context = move(left_context); } - void set_right_context(String right_context) { m_right_context = move(right_context); } - void set_$1(String value) { m_$1 = move(value); } - void set_$2(String value) { m_$2 = move(value); } - void set_$3(String value) { m_$3 = move(value); } - void set_$4(String value) { m_$4 = move(value); } - void set_$5(String value) { m_$5 = move(value); } - void set_$6(String value) { m_$6 = move(value); } - void set_$7(String value) { m_$7 = move(value); } - void set_$8(String value) { m_$8 = move(value); } - void set_$9(String value) { m_$9 = move(value); } + void set_input(Utf16String input) { m_input = move(input); } + void set_last_match(Utf16String last_match) { m_last_match = move(last_match); } + void set_last_paren(Utf16String last_paren) { m_last_paren = move(last_paren); } + void set_left_context(Utf16String left_context) { m_left_context = move(left_context); } + void set_right_context(Utf16String right_context) { m_right_context = move(right_context); } + void set_$1(Utf16String value) { m_$1 = move(value); } + void set_$2(Utf16String value) { m_$2 = move(value); } + void set_$3(Utf16String value) { m_$3 = move(value); } + void set_$4(Utf16String value) { m_$4 = move(value); } + void set_$5(Utf16String value) { m_$5 = move(value); } + void set_$6(Utf16String value) { m_$6 = move(value); } + void set_$7(Utf16String value) { m_$7 = move(value); } + void set_$8(Utf16String value) { m_$8 = move(value); } + void set_$9(Utf16String value) { m_$9 = move(value); } void invalidate(); private: - Optional m_input; - Optional m_last_match; - Optional m_last_paren; - Optional m_left_context; - Optional m_right_context; - Optional m_$1; - Optional m_$2; - Optional m_$3; - Optional m_$4; - Optional m_$5; - Optional m_$6; - Optional m_$7; - Optional m_$8; - Optional m_$9; + Optional m_input; + Optional m_last_match; + Optional m_last_paren; + Optional m_left_context; + Optional m_right_context; + Optional m_$1; + Optional m_$2; + Optional m_$3; + Optional m_$4; + Optional m_$5; + Optional m_$6; + Optional m_$7; + Optional m_$8; + Optional m_$9; }; -ThrowCompletionOr set_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, void (RegExpLegacyStaticProperties::*property_setter)(String), Value value); -ThrowCompletionOr get_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, Optional const& (RegExpLegacyStaticProperties::*property_getter)() const); -void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf16String const& string, size_t start_index, size_t end_index, Vector const& captured_values); +ThrowCompletionOr set_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, void (RegExpLegacyStaticProperties::*property_setter)(Utf16String), Value value); +ThrowCompletionOr get_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, Optional const& (RegExpLegacyStaticProperties::*property_getter)() const); +void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf16String const& string, size_t start_index, size_t end_index, Vector const& captured_values); void invalidate_legacy_regexp_static_properties(RegExpConstructor& constructor); } diff --git a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp index c8260a3011..3014fe94e3 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp @@ -265,7 +265,7 @@ static ThrowCompletionOr regexp_builtin_exec(VM& vm, RegExpObject& regexp // 24. Let indices be a new empty List. Vector> indices; - Vector captured_values; + Vector captured_values; // 25. Let groupNames be a new empty List. HashMap group_names; @@ -300,7 +300,7 @@ static ThrowCompletionOr regexp_builtin_exec(VM& vm, RegExpObject& regexp // ii. Append undefined to indices. indices.append({}); // iii. Append capture to indices. - captured_values.append(String::empty()); + captured_values.append(Utf16String(""sv)); } // c. Else, else { @@ -311,11 +311,12 @@ static ThrowCompletionOr regexp_builtin_exec(VM& vm, RegExpObject& regexp // 2. Set captureEnd to ! GetStringIndex(S, Input, captureEnd). // iv. Let capture be the Match { [[StartIndex]]: captureStart, [[EndIndex]: captureEnd }. // v. Let capturedValue be ! GetMatchString(S, capture). - captured_value = js_string(vm, capture.view.u16_view()); + auto capture_as_utf16_string = Utf16String(capture.view.u16_view()); + captured_value = js_string(vm, capture_as_utf16_string); // vi. Append capture to indices. indices.append(Match::create(capture)); // vii. Append capturedValue to the end of capturedValues. - captured_values.append(capture.view.to_string()); + captured_values.append(capture_as_utf16_string); } // d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue).