1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 16:57:46 +00:00

LibRegex: Allow Unicode escape sequences in capture group names

Unfortunately, this requires a slight divergence in the way the capture
group names are stored. Previously, the generated byte code would simply
store a view into the regex pattern string, so no string copying was
required.

Now, the escape sequences are decoded into a new string, and a vector
of all parsed capture group names are stored in a vector in the parser
result structure. The byte code then stores a view into the
corresponding string in that vector.
This commit is contained in:
Timothy Flynn 2021-08-18 17:17:18 -04:00 committed by Andreas Kling
parent 6131c0485e
commit 4f2cbe119b
4 changed files with 38 additions and 9 deletions

View file

@ -53,6 +53,7 @@ public:
size_t match_length_minimum;
Error error;
Token error_token;
Vector<FlyString> capture_groups;
};
explicit Parser(Lexer& lexer)
@ -218,7 +219,7 @@ private:
};
StringView read_digits_as_string(ReadDigitsInitialZeroState initial_zero = ReadDigitsInitialZeroState::Allow, bool hex = false, int max_count = -1, int min_count = -1);
Optional<unsigned> read_digits(ReadDigitsInitialZeroState initial_zero = ReadDigitsInitialZeroState::Allow, bool hex = false, int max_count = -1, int min_count = -1);
StringView read_capture_group_specifier(bool take_starting_angle_bracket = false);
FlyString read_capture_group_specifier(bool take_starting_angle_bracket = false);
struct Script {
Unicode::Script script {};