LibRegex: Allow Unicode escape sequences in capture group names

Unfortunately, this requires a slight divergence in the way the capture group names are stored. Previously, the generated byte code would simply store a view into the regex pattern string, so no string copying was required. Now, the escape sequences are decoded into a new string, and a vector of all parsed capture group names are stored in a vector in the parser result structure. The byte code then stores a view into the corresponding string in that vector.
2025-07-27 21:57:35 +00:00 · 2021-08-18 17:17:18 -04:00 · 2021-08-18 17:17:18 -04:00 · 4f2cbe119b
commit 4f2cbe119b
parent 6131c0485e
4 changed files with 38 additions and 9 deletions
--- a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.match.js
+++ b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.match.js
@ -55,3 +55,18 @@ test("UTF-16", () => {
    expect("😀😀".match(/\ud83d/g)).toEqual(["\ud83d", "\ud83d"]);
    expect("😀😀".match(/\ude00/g)).toEqual(["\ude00", "\ude00"]);
 });
+
+test("escaped code points", () => {
+    var string = "The quick brown fox jumped over the lazy dog's back";
+
+    var re = /(?<𝓑𝓻𝓸𝔀𝓷>brown)/u;
+    expect(string.match(re).groups.𝓑𝓻𝓸𝔀𝓷).toBe("brown");
+
+    re = /(?<\u{1d4d1}\u{1d4fb}\u{1d4f8}\u{1d500}\u{1d4f7}>brown)/u;
+    expect(string.match(re).groups.𝓑𝓻𝓸𝔀𝓷).toBe("brown");
+    expect(string.match(re).groups.𝓑𝓻𝓸𝔀𝓷).toBe("brown");
+
+    re = /(?<\ud835\udcd1\ud835\udcfb\ud835\udcf8\ud835\udd00\ud835\udcf7>brown)/u;
+    expect(string.match(re).groups.𝓑𝓻𝓸𝔀𝓷).toBe("brown");
+    expect(string.match(re).groups.𝓑𝓻𝓸𝔀𝓷).toBe("brown");
+});