diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp
index 1716462c06..d55e70afa0 100644
--- a/Tests/LibRegex/Regex.cpp
+++ b/Tests/LibRegex/Regex.cpp
@@ -593,6 +593,12 @@ TEST_CASE(ECMA262_parse)
{ "a{9007199254740992,9007199254740992}"sv, regex::Error::InvalidBraceContent },
{ "(?a)(?b)"sv, regex::Error::DuplicateNamedCapture },
{ "(?a)(?b)(?c)"sv, regex::Error::DuplicateNamedCapture },
+ { "(?<1a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
+ { "(?<\\a>a)"sv, regex::Error::InvalidNameForCaptureGroup },
+ { "(?<\ta>a)"sv, regex::Error::InvalidNameForCaptureGroup },
+ { "(?<$$_$$>a)"sv },
+ { "(?<ΓΏ>a)"sv },
+ { "(?<ππ»πΈππ·>a)"sv },
};
for (auto& test : tests) {
diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp
index 3642c93b48..8f6624e539 100644
--- a/Userland/Libraries/LibRegex/RegexParser.cpp
+++ b/Userland/Libraries/LibRegex/RegexParser.cpp
@@ -2036,29 +2036,109 @@ bool ECMA262Parser::parse_unicode_property_escape(PropertyEscape& property, bool
FlyString ECMA262Parser::read_capture_group_specifier(bool take_starting_angle_bracket)
{
+ static auto id_start_category = Unicode::property_from_string("ID_Start"sv);
+ static auto id_continue_category = Unicode::property_from_string("ID_Continue"sv);
+ static constexpr const u32 REPLACEMENT_CHARACTER = 0xFFFD;
+ constexpr const u32 ZERO_WIDTH_NON_JOINER { 0x200C };
+ constexpr const u32 ZERO_WIDTH_JOINER { 0x200D };
+
if (take_starting_angle_bracket && !consume("<"))
return {};
StringBuilder builder;
- while (match(TokenType::Char) || match(TokenType::Dollar) || match(TokenType::LeftCurly) || match(TokenType::RightCurly)) {
- auto c = m_parser_state.current_token.value();
- if (c == ">")
- break;
- if (try_skip("\\u"sv)) {
- if (auto code_point = consume_escaped_code_point(true); code_point.has_value()) {
- builder.append_code_point(*code_point);
+ auto consume_code_point = [&] {
+ Utf8View utf_8_view { m_parser_state.lexer.source().substring_view(m_parser_state.lexer.tell() - 1) };
+ if (utf_8_view.is_empty())
+ return REPLACEMENT_CHARACTER;
+ u32 code_point = *utf_8_view.begin();
+ auto characters = utf_8_view.byte_offset_of(1);
+
+ while (characters-- > 0)
+ consume();
+
+ return code_point;
+ };
+
+ {
+ // The first character is limited to: https://tc39.es/ecma262/#prod-RegExpIdentifierStart
+ // RegExpIdentifierStart[UnicodeMode] ::
+ // IdentifierStartChar
+ // \ RegExpUnicodeEscapeSequence[+UnicodeMode]
+ // [~UnicodeMode] UnicodeLeadSurrogate UnicodeTrailSurrogate
+
+ auto code_point = consume_code_point();
+
+ if (code_point == '\\' && match('u')) {
+ consume();
+
+ if (auto maybe_code_point = consume_escaped_code_point(true); maybe_code_point.has_value()) {
+ code_point = *maybe_code_point;
} else {
set_error(Error::InvalidNameForCaptureGroup);
return {};
}
- } else {
- builder.append(consume().value());
}
+
+ if (is_ascii(code_point)) {
+ // The only valid ID_Start unicode characters in ascii are the letters.
+ if (!is_ascii_alpha(code_point) && code_point != '$' && code_point != '_') {
+ set_error(Error::InvalidNameForCaptureGroup);
+ return {};
+ }
+ } else if (id_start_category.has_value() && !Unicode::code_point_has_property(code_point, *id_start_category)) {
+ set_error(Error::InvalidNameForCaptureGroup);
+ return {};
+ }
+ builder.append_code_point(code_point);
+ }
+
+ bool hit_end = false;
+
+ // Any following characters are limited to:
+ // RegExpIdentifierPart[UnicodeMode] ::
+ // IdentifierPartChar
+ // \ RegExpUnicodeEscapeSequence[+UnicodeMode]
+ // [~UnicodeMode] UnicodeLeadSurrogate UnicodeTrailSurrogate
+
+ while (match(TokenType::Char) || match(TokenType::Dollar) || match(TokenType::LeftCurly) || match(TokenType::RightCurly)) {
+ auto code_point = consume_code_point();
+
+ if (code_point == '>') {
+ hit_end = true;
+ break;
+ }
+
+ if (code_point == '\\') {
+ if (!try_skip("u")) {
+ set_error(Error::InvalidNameForCaptureGroup);
+ return {};
+ }
+ if (auto maybe_code_point = consume_escaped_code_point(true); maybe_code_point.has_value()) {
+ code_point = *maybe_code_point;
+ } else {
+ set_error(Error::InvalidNameForCaptureGroup);
+ return {};
+ }
+ }
+
+ if (is_ascii(code_point)) {
+ // The only valid ID_Continue unicode characters in ascii are the letters and numbers.
+ if (!is_ascii_alphanumeric(code_point) && code_point != '$' && code_point != '_') {
+ set_error(Error::InvalidNameForCaptureGroup);
+ return {};
+ }
+ } else if (code_point != ZERO_WIDTH_JOINER && code_point != ZERO_WIDTH_NON_JOINER) {
+ if (id_continue_category.has_value() && !Unicode::code_point_has_property(code_point, *id_continue_category)) {
+ set_error(Error::InvalidNameForCaptureGroup);
+ return {};
+ }
+ }
+ builder.append_code_point(code_point);
}
FlyString name = builder.build();
- if (!consume(">") || name.is_empty())
+ if (!hit_end || name.is_empty())
set_error(Error::InvalidNameForCaptureGroup);
return name;