mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 03:27:34 +00:00
LibRegex: Allow Unicode escape sequences in capture group names
Unfortunately, this requires a slight divergence in the way the capture group names are stored. Previously, the generated byte code would simply store a view into the regex pattern string, so no string copying was required. Now, the escape sequences are decoded into a new string, and a vector of all parsed capture group names are stored in a vector in the parser result structure. The byte code then stores a view into the corresponding string in that vector.
This commit is contained in:
parent
6131c0485e
commit
4f2cbe119b
4 changed files with 38 additions and 9 deletions
|
@ -698,6 +698,9 @@ TEST_CASE(ECMA262_unicode_match)
|
|||
{ "\\ud83d\\ud83d"sv, "\xed\xa0\xbd\xed\xa0\xbd"sv, true, ECMAScriptFlags::Unicode },
|
||||
{ "(?<=.{3})f"sv, "abcdef"sv, true, ECMAScriptFlags::Unicode },
|
||||
{ "(?<=.{3})f"sv, "abc😀ef"sv, true, ECMAScriptFlags::Unicode },
|
||||
{ "(?<𝓑𝓻𝓸𝔀𝓷>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
|
||||
{ "(?<\\u{1d4d1}\\u{1d4fb}\\u{1d4f8}\\u{1d500}\\u{1d4f7}>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
|
||||
{ "(?<\\ud835\\udcd1\\ud835\\udcfb\\ud835\\udcf8\\ud835\\udd00\\ud835\\udcf7>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
|
||||
};
|
||||
|
||||
for (auto& test : tests) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue