1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 03:57:43 +00:00

LibRegex: Support property escapes of the form \p{Type=Value}

Before now, only binary properties could be parsed. Non-binary props are
of the form "Type=Value", where "Type" may be General_Category, Script,
or Script_Extension (or their aliases). Of these, LibUnicode currently
supports General_Category, so LibRegex can parse only that type.
This commit is contained in:
Timothy Flynn 2021-07-31 18:06:53 -04:00 committed by Ali Mohammad Pur
parent 011514a384
commit 4de4312827
2 changed files with 40 additions and 11 deletions

View file

@ -1976,21 +1976,44 @@ Optional<ECMA262Parser::PropertyEscape> ECMA262Parser::read_unicode_property_esc
{
consume(TokenType::LeftCurly, Error::InvalidPattern);
auto start_token = m_parser_state.current_token;
size_t offset = 0;
while (match(TokenType::Char)) {
if (m_parser_state.current_token.value() == "}")
break;
offset += consume().value().length();
// Note: clang-format is disabled here because it doesn't handle templated lambdas yet.
// clang-format off
auto read_until = [&]<typename... Ts>(Ts&&... terminators) {
auto start_token = m_parser_state.current_token;
size_t offset = 0;
while (match(TokenType::Char)) {
if (m_parser_state.current_token.value().is_one_of(forward<Ts>(terminators)...))
break;
offset += consume().value().length();
}
return StringView { start_token.value().characters_without_null_termination(), offset };
};
// clang-format on
StringView property_type;
StringView property_name = read_until("="sv, "}"sv);
if (try_skip("="sv)) {
if (property_name.is_empty())
return {};
property_type = property_name;
property_name = read_until("}"sv);
}
StringView property_name { start_token.value().characters_without_null_termination(), offset };
consume(TokenType::RightCurly, Error::InvalidPattern);
if (auto property = Unicode::property_from_string(property_name); property.has_value())
return { *property };
if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value())
return { *general_category };
if (property_type.is_empty()) {
if (auto property = Unicode::property_from_string(property_name); property.has_value())
return { *property };
if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value())
return { *general_category };
} else if ((property_type == "General_Category"sv) || (property_type == "gc"sv)) {
if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value())
return { *general_category };
}
return {};
}