mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 13:07:46 +00:00
LibRegex: Support non-ASCII whitespace characters when matching \s or \S
ECMA-262 defines \s as: Return the CharSet containing all characters corresponding to a code point on the right-hand side of the WhiteSpace or LineTerminator productions. The LineTerminator production is simply: U+000A, U+000D, U+2028, or U+2029. Unfortunately there isn't a Unicode property that covers just those code points. The WhiteSpace production is: U+0009, U+000B, U+000C, U+FEFF, or any code point with the Space_Separator general category. If the Unicode generators are disabled, this will fall back to ASCII space code points.
This commit is contained in:
parent
54845c4bf2
commit
2212aa2388
2 changed files with 22 additions and 1 deletions
|
@ -706,6 +706,13 @@ TEST_CASE(ECMA262_match)
|
|||
|
||||
TEST_CASE(ECMA262_unicode_match)
|
||||
{
|
||||
constexpr auto space_and_line_terminator_code_points = Array { 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x0020, 0x00A0, 0x1680, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x2028, 0x2029, 0x202F, 0x205F, 0x3000, 0xFEFF };
|
||||
|
||||
StringBuilder builder;
|
||||
for (u32 code_point : space_and_line_terminator_code_points)
|
||||
builder.append_code_point(code_point);
|
||||
auto space_and_line_terminators = builder.build();
|
||||
|
||||
struct _test {
|
||||
StringView pattern;
|
||||
StringView subject;
|
||||
|
@ -729,6 +736,8 @@ TEST_CASE(ECMA262_unicode_match)
|
|||
{ "(?<𝓑𝓻𝓸𝔀𝓷>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
|
||||
{ "(?<\\u{1d4d1}\\u{1d4fb}\\u{1d4f8}\\u{1d500}\\u{1d4f7}>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
|
||||
{ "(?<\\ud835\\udcd1\\ud835\\udcfb\\ud835\\udcf8\\ud835\\udd00\\ud835\\udcf7>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode },
|
||||
{ "^\\s+$"sv, space_and_line_terminators },
|
||||
{ "^\\s+$"sv, space_and_line_terminators, true, ECMAScriptFlags::Unicode },
|
||||
};
|
||||
|
||||
for (auto& test : tests) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue