mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 12:17:35 +00:00
LibRegex: Implement and use a REPEAT operation for bytecode repetition
Currently, when we need to repeat an instruction N times, we simply add that instruction N times in a for-loop. This doesn't scale well with extremely large values of N, and ECMA-262 allows up to N = 2^53 - 1. Instead, add a new REPEAT bytecode operation to defer this loop from the parser to the runtime executor. This allows the parser to complete sans any loops (for this instruction), and allows the executor to bail early if the repeated bytecode fails. Note: The templated ByteCode methods are to allow the Posix parsers to continue using u32 because they are limited to N = 2^20.
This commit is contained in:
parent
a0b72f5ad3
commit
9509433e25
7 changed files with 103 additions and 16 deletions
|
@ -102,6 +102,7 @@ protected:
|
|||
size_t capture_groups_count { 0 };
|
||||
size_t named_capture_groups_count { 0 };
|
||||
size_t match_length_minimum { 0 };
|
||||
size_t repetition_mark_count { 0 };
|
||||
AllOptions regex_options;
|
||||
HashMap<int, size_t> capture_group_minimum_lengths;
|
||||
HashMap<FlyString, NamedCaptureGroup> named_capture_groups;
|
||||
|
@ -232,7 +233,7 @@ private:
|
|||
bool parse_assertion(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_atom(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_quantifier(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_interval_quantifier(Optional<size_t>& repeat_min, Optional<size_t>& repeat_max);
|
||||
bool parse_interval_quantifier(Optional<u64>& repeat_min, Optional<u64>& repeat_max);
|
||||
bool parse_atom_escape(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_character_class(ByteCode&, size_t&, bool unicode, bool named);
|
||||
bool parse_capture_group(ByteCode&, size_t&, bool unicode, bool named);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue