mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 13:57:35 +00:00
LibJS: Avoid expensive UTF-8/16 conversion in legacy RegExp properties
Let's not incur the cost of a synchronous conversion to UTF-8 for all the legacy static properties after running a regular expression. The SunSpider subtest regexp-dna goes from taking ~25 sec to ~0.7 sec on my machine.
This commit is contained in:
parent
e1fb64abad
commit
f39b6ae3c6
3 changed files with 70 additions and 68 deletions
|
@ -29,7 +29,7 @@ void RegExpLegacyStaticProperties::invalidate()
|
|||
}
|
||||
|
||||
// GetLegacyRegExpStaticProperty( C, thisValue, internalSlotName ), https://github.com/tc39/proposal-regexp-legacy-features#getlegacyregexpstaticproperty-c-thisvalue-internalslotname-
|
||||
ThrowCompletionOr<Value> get_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, Optional<String> const& (RegExpLegacyStaticProperties::*property_getter)() const)
|
||||
ThrowCompletionOr<Value> get_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, Optional<Utf16String> const& (RegExpLegacyStaticProperties::*property_getter)() const)
|
||||
{
|
||||
// 1. Assert C is an object that has an internal slot named internalSlotName.
|
||||
|
||||
|
@ -49,7 +49,7 @@ ThrowCompletionOr<Value> get_legacy_regexp_static_property(VM& vm, RegExpConstru
|
|||
}
|
||||
|
||||
// SetLegacyRegExpStaticProperty( C, thisValue, internalSlotName, val ), https://github.com/tc39/proposal-regexp-legacy-features#setlegacyregexpstaticproperty-c-thisvalue-internalslotname-val-
|
||||
ThrowCompletionOr<void> set_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, void (RegExpLegacyStaticProperties::*property_setter)(String), Value value)
|
||||
ThrowCompletionOr<void> set_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, void (RegExpLegacyStaticProperties::*property_setter)(Utf16String), Value value)
|
||||
{
|
||||
// 1. Assert C is an object that has an internal slot named internalSlotName.
|
||||
|
||||
|
@ -58,7 +58,7 @@ ThrowCompletionOr<void> set_legacy_regexp_static_property(VM& vm, RegExpConstruc
|
|||
return vm.throw_completion<TypeError>(ErrorType::SetLegacyRegExpStaticPropertyThisValueMismatch);
|
||||
|
||||
// 3. Let strVal be ? ToString(val).
|
||||
auto str_value = TRY(value.to_string(vm));
|
||||
auto str_value = TRY(value.to_utf16_string(vm));
|
||||
|
||||
// 4. Set the value of the internal slot of C named internalSlotName to strVal.
|
||||
(constructor.legacy_static_properties().*property_setter)(str_value);
|
||||
|
@ -67,7 +67,7 @@ ThrowCompletionOr<void> set_legacy_regexp_static_property(VM& vm, RegExpConstruc
|
|||
}
|
||||
|
||||
// UpdateLegacyRegExpStaticProperties ( C, S, startIndex, endIndex, capturedValues ), https://github.com/tc39/proposal-regexp-legacy-features#updatelegacyregexpstaticproperties--c-s-startindex-endindex-capturedvalues-
|
||||
void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf16String const& string, size_t start_index, size_t end_index, Vector<String> const& captured_values)
|
||||
void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf16String const& string, size_t start_index, size_t end_index, Vector<Utf16String> const& captured_values)
|
||||
{
|
||||
auto& legacy_static_properties = constructor.legacy_static_properties();
|
||||
|
||||
|
@ -87,11 +87,11 @@ void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf1
|
|||
auto group_count = captured_values.size();
|
||||
|
||||
// 7. Set the value of C’s [[RegExpInput]] internal slot to S.
|
||||
legacy_static_properties.set_input(string.to_utf8());
|
||||
legacy_static_properties.set_input(string);
|
||||
|
||||
// 8. Set the value of C’s [[RegExpLastMatch]] internal slot to a String whose length is endIndex - startIndex and containing the code units from S with indices startIndex through endIndex - 1, in ascending order.
|
||||
auto last_match = string.view().substring_view(start_index, end_index - start_index);
|
||||
legacy_static_properties.set_last_match(last_match.to_utf8());
|
||||
legacy_static_properties.set_last_match(Utf16String(last_match));
|
||||
|
||||
// 9. If n > 0, set the value of C’s [[RegExpLastParen]] internal slot to the last element of capturedValues.
|
||||
if (group_count > 0) {
|
||||
|
@ -100,20 +100,20 @@ void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf1
|
|||
}
|
||||
// 10. Else, set the value of C’s [[RegExpLastParen]] internal slot to the empty String.
|
||||
else {
|
||||
legacy_static_properties.set_last_paren(String::empty());
|
||||
legacy_static_properties.set_last_paren(Utf16String(""sv));
|
||||
}
|
||||
|
||||
// 11. Set the value of C’s [[RegExpLeftContext]] internal slot to a String whose length is startIndex and containing the code units from S with indices 0 through startIndex - 1, in ascending order.
|
||||
auto left_context = string.view().substring_view(0, start_index);
|
||||
legacy_static_properties.set_left_context(left_context.to_utf8());
|
||||
legacy_static_properties.set_left_context(Utf16String(left_context));
|
||||
|
||||
// 12. Set the value of C’s [[RegExpRightContext]] internal slot to a String whose length is len - endIndex and containing the code units from S with indices endIndex through len - 1, in ascending order.
|
||||
auto right_context = string.view().substring_view(end_index, len - end_index);
|
||||
legacy_static_properties.set_right_context(right_context.to_utf8());
|
||||
legacy_static_properties.set_right_context(Utf16String(right_context));
|
||||
|
||||
// 13. For each integer i such that 1 ≤ i ≤ 9
|
||||
for (size_t i = 1; i <= 9; i++) {
|
||||
auto value = String::empty();
|
||||
auto value = Utf16String(""sv);
|
||||
// If i ≤ n, set the value of C’s [[RegExpPareni]] internal slot to the ith element of capturedValues.
|
||||
if (i <= group_count) {
|
||||
value = captured_values[i - 1];
|
||||
|
@ -124,23 +124,23 @@ void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf1
|
|||
}
|
||||
|
||||
if (i == 1) {
|
||||
legacy_static_properties.set_$1(value);
|
||||
legacy_static_properties.set_$1(Utf16String(value));
|
||||
} else if (i == 2) {
|
||||
legacy_static_properties.set_$2(value);
|
||||
legacy_static_properties.set_$2(Utf16String(value));
|
||||
} else if (i == 3) {
|
||||
legacy_static_properties.set_$3(value);
|
||||
legacy_static_properties.set_$3(Utf16String(value));
|
||||
} else if (i == 4) {
|
||||
legacy_static_properties.set_$4(value);
|
||||
legacy_static_properties.set_$4(Utf16String(value));
|
||||
} else if (i == 5) {
|
||||
legacy_static_properties.set_$5(value);
|
||||
legacy_static_properties.set_$5(Utf16String(value));
|
||||
} else if (i == 6) {
|
||||
legacy_static_properties.set_$6(value);
|
||||
legacy_static_properties.set_$6(Utf16String(value));
|
||||
} else if (i == 7) {
|
||||
legacy_static_properties.set_$7(value);
|
||||
legacy_static_properties.set_$7(Utf16String(value));
|
||||
} else if (i == 8) {
|
||||
legacy_static_properties.set_$8(value);
|
||||
legacy_static_properties.set_$8(Utf16String(value));
|
||||
} else if (i == 9) {
|
||||
legacy_static_properties.set_$9(value);
|
||||
legacy_static_properties.set_$9(Utf16String(value));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <AK/Optional.h>
|
||||
#include <AK/String.h>
|
||||
#include <LibJS/Forward.h>
|
||||
#include <LibJS/Runtime/Utf16String.h>
|
||||
|
||||
namespace JS {
|
||||
|
||||
|
@ -22,57 +23,57 @@ namespace JS {
|
|||
// [[RegExpParen1]] ... [[RegExpParen9]]
|
||||
class RegExpLegacyStaticProperties {
|
||||
public:
|
||||
Optional<String> const& input() const { return m_input; }
|
||||
Optional<String> const& last_match() const { return m_last_match; }
|
||||
Optional<String> const& last_paren() const { return m_last_paren; }
|
||||
Optional<String> const& left_context() const { return m_left_context; }
|
||||
Optional<String> const& right_context() const { return m_right_context; }
|
||||
Optional<String> const& $1() const { return m_$1; }
|
||||
Optional<String> const& $2() const { return m_$2; }
|
||||
Optional<String> const& $3() const { return m_$3; }
|
||||
Optional<String> const& $4() const { return m_$4; }
|
||||
Optional<String> const& $5() const { return m_$5; }
|
||||
Optional<String> const& $6() const { return m_$6; }
|
||||
Optional<String> const& $7() const { return m_$7; }
|
||||
Optional<String> const& $8() const { return m_$8; }
|
||||
Optional<String> const& $9() const { return m_$9; }
|
||||
Optional<Utf16String> const& input() const { return m_input; }
|
||||
Optional<Utf16String> const& last_match() const { return m_last_match; }
|
||||
Optional<Utf16String> const& last_paren() const { return m_last_paren; }
|
||||
Optional<Utf16String> const& left_context() const { return m_left_context; }
|
||||
Optional<Utf16String> const& right_context() const { return m_right_context; }
|
||||
Optional<Utf16String> const& $1() const { return m_$1; }
|
||||
Optional<Utf16String> const& $2() const { return m_$2; }
|
||||
Optional<Utf16String> const& $3() const { return m_$3; }
|
||||
Optional<Utf16String> const& $4() const { return m_$4; }
|
||||
Optional<Utf16String> const& $5() const { return m_$5; }
|
||||
Optional<Utf16String> const& $6() const { return m_$6; }
|
||||
Optional<Utf16String> const& $7() const { return m_$7; }
|
||||
Optional<Utf16String> const& $8() const { return m_$8; }
|
||||
Optional<Utf16String> const& $9() const { return m_$9; }
|
||||
|
||||
void set_input(String input) { m_input = move(input); }
|
||||
void set_last_match(String last_match) { m_last_match = move(last_match); }
|
||||
void set_last_paren(String last_paren) { m_last_paren = move(last_paren); }
|
||||
void set_left_context(String left_context) { m_left_context = move(left_context); }
|
||||
void set_right_context(String right_context) { m_right_context = move(right_context); }
|
||||
void set_$1(String value) { m_$1 = move(value); }
|
||||
void set_$2(String value) { m_$2 = move(value); }
|
||||
void set_$3(String value) { m_$3 = move(value); }
|
||||
void set_$4(String value) { m_$4 = move(value); }
|
||||
void set_$5(String value) { m_$5 = move(value); }
|
||||
void set_$6(String value) { m_$6 = move(value); }
|
||||
void set_$7(String value) { m_$7 = move(value); }
|
||||
void set_$8(String value) { m_$8 = move(value); }
|
||||
void set_$9(String value) { m_$9 = move(value); }
|
||||
void set_input(Utf16String input) { m_input = move(input); }
|
||||
void set_last_match(Utf16String last_match) { m_last_match = move(last_match); }
|
||||
void set_last_paren(Utf16String last_paren) { m_last_paren = move(last_paren); }
|
||||
void set_left_context(Utf16String left_context) { m_left_context = move(left_context); }
|
||||
void set_right_context(Utf16String right_context) { m_right_context = move(right_context); }
|
||||
void set_$1(Utf16String value) { m_$1 = move(value); }
|
||||
void set_$2(Utf16String value) { m_$2 = move(value); }
|
||||
void set_$3(Utf16String value) { m_$3 = move(value); }
|
||||
void set_$4(Utf16String value) { m_$4 = move(value); }
|
||||
void set_$5(Utf16String value) { m_$5 = move(value); }
|
||||
void set_$6(Utf16String value) { m_$6 = move(value); }
|
||||
void set_$7(Utf16String value) { m_$7 = move(value); }
|
||||
void set_$8(Utf16String value) { m_$8 = move(value); }
|
||||
void set_$9(Utf16String value) { m_$9 = move(value); }
|
||||
void invalidate();
|
||||
|
||||
private:
|
||||
Optional<String> m_input;
|
||||
Optional<String> m_last_match;
|
||||
Optional<String> m_last_paren;
|
||||
Optional<String> m_left_context;
|
||||
Optional<String> m_right_context;
|
||||
Optional<String> m_$1;
|
||||
Optional<String> m_$2;
|
||||
Optional<String> m_$3;
|
||||
Optional<String> m_$4;
|
||||
Optional<String> m_$5;
|
||||
Optional<String> m_$6;
|
||||
Optional<String> m_$7;
|
||||
Optional<String> m_$8;
|
||||
Optional<String> m_$9;
|
||||
Optional<Utf16String> m_input;
|
||||
Optional<Utf16String> m_last_match;
|
||||
Optional<Utf16String> m_last_paren;
|
||||
Optional<Utf16String> m_left_context;
|
||||
Optional<Utf16String> m_right_context;
|
||||
Optional<Utf16String> m_$1;
|
||||
Optional<Utf16String> m_$2;
|
||||
Optional<Utf16String> m_$3;
|
||||
Optional<Utf16String> m_$4;
|
||||
Optional<Utf16String> m_$5;
|
||||
Optional<Utf16String> m_$6;
|
||||
Optional<Utf16String> m_$7;
|
||||
Optional<Utf16String> m_$8;
|
||||
Optional<Utf16String> m_$9;
|
||||
};
|
||||
|
||||
ThrowCompletionOr<void> set_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, void (RegExpLegacyStaticProperties::*property_setter)(String), Value value);
|
||||
ThrowCompletionOr<Value> get_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, Optional<String> const& (RegExpLegacyStaticProperties::*property_getter)() const);
|
||||
void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf16String const& string, size_t start_index, size_t end_index, Vector<String> const& captured_values);
|
||||
ThrowCompletionOr<void> set_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, void (RegExpLegacyStaticProperties::*property_setter)(Utf16String), Value value);
|
||||
ThrowCompletionOr<Value> get_legacy_regexp_static_property(VM& vm, RegExpConstructor& constructor, Value this_value, Optional<Utf16String> const& (RegExpLegacyStaticProperties::*property_getter)() const);
|
||||
void update_legacy_regexp_static_properties(RegExpConstructor& constructor, Utf16String const& string, size_t start_index, size_t end_index, Vector<Utf16String> const& captured_values);
|
||||
void invalidate_legacy_regexp_static_properties(RegExpConstructor& constructor);
|
||||
|
||||
}
|
||||
|
|
|
@ -265,7 +265,7 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
|
||||
// 24. Let indices be a new empty List.
|
||||
Vector<Optional<Match>> indices;
|
||||
Vector<String> captured_values;
|
||||
Vector<Utf16String> captured_values;
|
||||
|
||||
// 25. Let groupNames be a new empty List.
|
||||
HashMap<FlyString, Match> group_names;
|
||||
|
@ -300,7 +300,7 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
// ii. Append undefined to indices.
|
||||
indices.append({});
|
||||
// iii. Append capture to indices.
|
||||
captured_values.append(String::empty());
|
||||
captured_values.append(Utf16String(""sv));
|
||||
}
|
||||
// c. Else,
|
||||
else {
|
||||
|
@ -311,11 +311,12 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(VM& vm, RegExpObject& regexp
|
|||
// 2. Set captureEnd to ! GetStringIndex(S, Input, captureEnd).
|
||||
// iv. Let capture be the Match { [[StartIndex]]: captureStart, [[EndIndex]: captureEnd }.
|
||||
// v. Let capturedValue be ! GetMatchString(S, capture).
|
||||
captured_value = js_string(vm, capture.view.u16_view());
|
||||
auto capture_as_utf16_string = Utf16String(capture.view.u16_view());
|
||||
captured_value = js_string(vm, capture_as_utf16_string);
|
||||
// vi. Append capture to indices.
|
||||
indices.append(Match::create(capture));
|
||||
// vii. Append capturedValue to the end of capturedValues.
|
||||
captured_values.append(capture.view.to_string());
|
||||
captured_values.append(capture_as_utf16_string);
|
||||
}
|
||||
|
||||
// d. Perform ! CreateDataPropertyOrThrow(A, ! ToString(𝔽(i)), capturedValue).
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue