mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 02:17:34 +00:00
LibRegex: Support property escapes of the form \p{Type=Value}
Before now, only binary properties could be parsed. Non-binary props are of the form "Type=Value", where "Type" may be General_Category, Script, or Script_Extension (or their aliases). Of these, LibUnicode currently supports General_Category, so LibRegex can parse only that type.
This commit is contained in:
parent
011514a384
commit
4de4312827
2 changed files with 40 additions and 11 deletions
|
@ -667,6 +667,12 @@ TEST_CASE(ECMA262_property_match)
|
||||||
{ "\\p{Cased_Letter}", "a", true, ECMAScriptFlags::Unicode },
|
{ "\\p{Cased_Letter}", "a", true, ECMAScriptFlags::Unicode },
|
||||||
{ "\\p{Cased_Letter}", "A", true, ECMAScriptFlags::Unicode },
|
{ "\\p{Cased_Letter}", "A", true, ECMAScriptFlags::Unicode },
|
||||||
{ "\\p{Cased_Letter}", "9", false, ECMAScriptFlags::Unicode },
|
{ "\\p{Cased_Letter}", "9", false, ECMAScriptFlags::Unicode },
|
||||||
|
{ "\\p{General_Category=Cased_Letter}", "a", true, ECMAScriptFlags::Unicode },
|
||||||
|
{ "\\p{General_Category=Cased_Letter}", "A", true, ECMAScriptFlags::Unicode },
|
||||||
|
{ "\\p{General_Category=Cased_Letter}", "9", false, ECMAScriptFlags::Unicode },
|
||||||
|
{ "\\p{gc=Cased_Letter}", "a", true, ECMAScriptFlags::Unicode },
|
||||||
|
{ "\\p{gc=Cased_Letter}", "A", true, ECMAScriptFlags::Unicode },
|
||||||
|
{ "\\p{gc=Cased_Letter}", "9", false, ECMAScriptFlags::Unicode },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (auto& test : tests) {
|
for (auto& test : tests) {
|
||||||
|
|
|
@ -1976,21 +1976,44 @@ Optional<ECMA262Parser::PropertyEscape> ECMA262Parser::read_unicode_property_esc
|
||||||
{
|
{
|
||||||
consume(TokenType::LeftCurly, Error::InvalidPattern);
|
consume(TokenType::LeftCurly, Error::InvalidPattern);
|
||||||
|
|
||||||
auto start_token = m_parser_state.current_token;
|
// Note: clang-format is disabled here because it doesn't handle templated lambdas yet.
|
||||||
size_t offset = 0;
|
// clang-format off
|
||||||
while (match(TokenType::Char)) {
|
auto read_until = [&]<typename... Ts>(Ts&&... terminators) {
|
||||||
if (m_parser_state.current_token.value() == "}")
|
auto start_token = m_parser_state.current_token;
|
||||||
break;
|
size_t offset = 0;
|
||||||
offset += consume().value().length();
|
|
||||||
|
while (match(TokenType::Char)) {
|
||||||
|
if (m_parser_state.current_token.value().is_one_of(forward<Ts>(terminators)...))
|
||||||
|
break;
|
||||||
|
offset += consume().value().length();
|
||||||
|
}
|
||||||
|
|
||||||
|
return StringView { start_token.value().characters_without_null_termination(), offset };
|
||||||
|
};
|
||||||
|
// clang-format on
|
||||||
|
|
||||||
|
StringView property_type;
|
||||||
|
StringView property_name = read_until("="sv, "}"sv);
|
||||||
|
|
||||||
|
if (try_skip("="sv)) {
|
||||||
|
if (property_name.is_empty())
|
||||||
|
return {};
|
||||||
|
property_type = property_name;
|
||||||
|
property_name = read_until("}"sv);
|
||||||
}
|
}
|
||||||
|
|
||||||
StringView property_name { start_token.value().characters_without_null_termination(), offset };
|
|
||||||
consume(TokenType::RightCurly, Error::InvalidPattern);
|
consume(TokenType::RightCurly, Error::InvalidPattern);
|
||||||
|
|
||||||
if (auto property = Unicode::property_from_string(property_name); property.has_value())
|
if (property_type.is_empty()) {
|
||||||
return { *property };
|
if (auto property = Unicode::property_from_string(property_name); property.has_value())
|
||||||
if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value())
|
return { *property };
|
||||||
return { *general_category };
|
if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value())
|
||||||
|
return { *general_category };
|
||||||
|
} else if ((property_type == "General_Category"sv) || (property_type == "gc"sv)) {
|
||||||
|
if (auto general_category = Unicode::general_category_from_string(property_name); general_category.has_value())
|
||||||
|
return { *general_category };
|
||||||
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue