1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 06:27:45 +00:00

LibJS+LibRegex: Don't repeat regex match in regexp_exec()

LibRegex already implements this loop in a more performant way, so all
LibJS has to do here is to return things in the right shape, and not
loop over the input string.
Previously this was a quadratic operation on string length, which lead
to crazy execution times on failing regexps - now it's nice and fast :^)

Note that a Regex test has to be updated to remove the stateful flag as
it repeats matching on multiple strings.
This commit is contained in:
Ali Mohammad Pur 2022-02-04 23:56:44 +03:30 committed by Andreas Kling
parent 2b028f6faa
commit a962ee020a
3 changed files with 29 additions and 45 deletions

View file

@ -984,7 +984,9 @@ TEST_CASE(negative_lookahead)
{ {
{ {
// Negative lookahead with more than 2 forks difference between lookahead init and finish. // Negative lookahead with more than 2 forks difference between lookahead init and finish.
Regex<ECMA262> re(":(?!\\^\\)|1)", ECMAScriptFlags::Global); auto options = ECMAScriptOptions { ECMAScriptFlags::Global };
options.reset_flag((ECMAScriptFlags)regex::AllFlags::Internal_Stateful);
Regex<ECMA262> re(":(?!\\^\\)|1)", options);
EXPECT_EQ(re.match(":^)").success, false); EXPECT_EQ(re.match(":^)").success, false);
EXPECT_EQ(re.match(":1").success, false); EXPECT_EQ(re.match(":1").success, false);
EXPECT_EQ(re.match(":foobar").success, true); EXPECT_EQ(re.match(":foobar").success, true);

View file

@ -208,48 +208,38 @@ static ThrowCompletionOr<Value> regexp_builtin_exec(GlobalObject& global_object,
RegexResult result; RegexResult result;
// NOTE: For optimisation purposes, this whole loop is implemented in LibRegex.
// 12. Let matchSucceeded be false. // 12. Let matchSucceeded be false.
// 13. Let Input be a List consisting of all of the characters, in order, of S. If fullUnicode is true, each character is a code unit, otherwise each character is a code point. // 13. Let Input be a List consisting of all of the characters, in order, of S. If fullUnicode is true, each character is a code unit, otherwise each character is a code point.
// 14. Repeat, while matchSucceeded is false, // 14. Repeat, while matchSucceeded is false
while (true) {
// a. If lastIndex > length, then // a. If lastIndex > length, then
if (last_index > string.length_in_code_units()) {
// i. If global is true or sticky is true, then // i. If global is true or sticky is true, then
if (global || sticky) { // 1. Perform ? Set(R, "lastIndex", 0, true).
// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
TRY(regexp_object.set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes));
}
// ii. Return null. // ii. Return null.
return js_null();
}
// b. Let r be matcher(Input, lastIndex). // b. Let r be matcher(Input, lastIndex).
// c. If r is failure, then
// i. If sticky is true, then
// 1. Perform ? Set(R, "lastIndex", 0, true).
// 2. Return null.
// ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
// d. Else,
// i. Assert: r is a State.
// ii. Set matchSucceeded to true.
// 14.b
regex.start_offset = full_unicode ? string.view().code_point_offset_of(last_index) : last_index; regex.start_offset = full_unicode ? string.view().code_point_offset_of(last_index) : last_index;
result = regex.match(string.view()); result = regex.match(string.view());
// c. If r is failure, then // 14.c and 14.a
if (!result.success) { if (!result.success) {
// i. If sticky is true, then // 14.c.i, 14.a.i
if (sticky) { if (sticky || global)
// 1. Perform ? Set(R, "lastIndex", +0𝔽, true).
TRY(regexp_object.set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes)); TRY(regexp_object.set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes));
// 2. Return null. // 14.a.ii, 14.c.i.2
return js_null(); return js_null();
} }
// ii. Set lastIndex to AdvanceStringIndex(S, lastIndex, fullUnicode).
last_index = advance_string_index(string.view(), last_index, full_unicode);
}
// d. Else,
else {
// i. Assert: r is a State.
// ii. Set matchSucceeded to true.
break;
}
}
auto& match = result.matches[0]; auto& match = result.matches[0];
auto match_index = match.global_offset; auto match_index = match.global_offset;

View file

@ -180,8 +180,6 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
#endif #endif
bool continue_search = input.regex_options.has_flag_set(AllFlags::Global) || input.regex_options.has_flag_set(AllFlags::Multiline); bool continue_search = input.regex_options.has_flag_set(AllFlags::Global) || input.regex_options.has_flag_set(AllFlags::Multiline);
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
continue_search = false;
auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch); auto single_match_only = input.regex_options.has_flag_set(AllFlags::SingleMatch);
@ -282,11 +280,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
break; break;
continue; continue;
} }
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) { if (state.string_position < view_length && !input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {
append_match(input, state, view_index);
break;
}
if (state.string_position < view_length) {
return { false, 0, {}, {}, {}, operations }; return { false, 0, {}, {}, {}, operations };
} }
@ -503,8 +497,6 @@ Optional<bool> Matcher<Parser>::execute(MatchInput const& input, MatchState& sta
return false; return false;
case ExecutionResult::Failed_ExecuteLowPrioForks: { case ExecutionResult::Failed_ExecuteLowPrioForks: {
if (states_to_try_next.is_empty()) { if (states_to_try_next.is_empty()) {
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
return {};
return false; return false;
} }
state = states_to_try_next.take_last(); state = states_to_try_next.take_last();