diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index be2b026fd0..5203a277a0 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -485,6 +485,18 @@ TEST_CASE(simple_period_end_benchmark) EXPECT_EQ(re.search("hello?", m), true); } +TEST_CASE(posix_extended_nested_capture_group) +{ + Regex re("(h(e(?llo)))"); // group 0 -> "hello", group 1 -> "ello", group 2/"llo" -> "llo" + auto result = re.match("hello"); + EXPECT(result.success); + EXPECT_EQ(result.capture_group_matches.size(), 1u); + EXPECT_EQ(result.capture_group_matches[0].size(), 3u); + EXPECT_EQ(result.capture_group_matches[0][0].view, "hello"sv); + EXPECT_EQ(result.capture_group_matches[0][1].view, "ello"sv); + EXPECT_EQ(result.capture_group_matches[0][2].view, "llo"sv); +} + TEST_CASE(ECMA262_parse) { struct _test { diff --git a/Userland/Libraries/LibRegex/RegexParser.cpp b/Userland/Libraries/LibRegex/RegexParser.cpp index 6f95c3d678..8fa9dd9b18 100644 --- a/Userland/Libraries/LibRegex/RegexParser.cpp +++ b/Userland/Libraries/LibRegex/RegexParser.cpp @@ -799,6 +799,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si last_token = consume(); } capture_group_name = StringView(start_token.value().characters_without_null_termination(), capture_group_name_length); + ++m_parser_state.named_capture_groups_count; } else if (match(TokenType::EqualSign)) { // positive lookahead consume(); @@ -817,8 +818,11 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si } } - if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) - bytecode.insert_bytecode_group_capture_left(m_parser_state.capture_groups_count); + auto current_capture_group = m_parser_state.capture_groups_count; + if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) { + bytecode.insert_bytecode_group_capture_left(current_capture_group); + m_parser_state.capture_groups_count++; + } ByteCode capture_group_bytecode; @@ -846,13 +850,10 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si consume(TokenType::RightParen, Error::MismatchingParen); if (!(m_parser_state.regex_options & AllFlags::SkipSubExprResults || prevent_capture_group)) { - if (capture_group_name.has_value()) { - bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count, capture_group_name.value()); - ++m_parser_state.named_capture_groups_count; - } else { - bytecode.insert_bytecode_group_capture_right(m_parser_state.capture_groups_count); - } - ++m_parser_state.capture_groups_count; + if (capture_group_name.has_value()) + bytecode.insert_bytecode_group_capture_right(current_capture_group, capture_group_name.value()); + else + bytecode.insert_bytecode_group_capture_right(current_capture_group); } should_parse_repetition_symbol = true; break;