1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 15:17:36 +00:00

LibRegex: Support property escapes of Unicode script extensions

This commit is contained in:
Timothy Flynn 2021-08-04 07:26:25 -04:00 committed by Linus Groh
parent 5edd458420
commit 484ccfadc3
5 changed files with 51 additions and 7 deletions

View file

@ -1556,8 +1556,11 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
[&](Unicode::GeneralCategory general_category) {
compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category });
},
[&](Unicode::Script script) {
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Script, (ByteCodeValueType)script });
[&](Script script) {
if (script.is_extension)
compares.empend(CompareTypeAndValuePair { CharacterCompareType::ScriptExtension, (ByteCodeValueType)script.script });
else
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Script, (ByteCodeValueType)script.script });
});
stack.insert_bytecode_compare_values(move(compares));
match_length_minimum += 1;
@ -1716,6 +1719,7 @@ struct CharClassRangeElement {
bool is_property { false };
bool is_general_category { false };
bool is_script { false };
bool is_script_extension { false };
};
bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>& ranges, bool unicode)
@ -1810,8 +1814,11 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
[&](Unicode::GeneralCategory general_category) {
return CharClassRangeElement { .general_category = general_category, .is_negated = negated, .is_character_class = true, .is_general_category = true };
},
[&](Unicode::Script script) {
return CharClassRangeElement { .script = script, .is_negated = negated, .is_character_class = true, .is_script = true };
[&](Script script) {
if (script.is_extension)
return CharClassRangeElement { .script = script.script, .is_negated = negated, .is_character_class = true, .is_script_extension = true };
else
return CharClassRangeElement { .script = script.script, .is_negated = negated, .is_character_class = true, .is_script = true };
});
}
}
@ -1861,6 +1868,8 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
ranges.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)(atom.general_category) });
else if (atom.is_script)
ranges.empend(CompareTypeAndValuePair { CharacterCompareType::Script, (ByteCodeValueType)(atom.script) });
else if (atom.is_script_extension)
ranges.empend(CompareTypeAndValuePair { CharacterCompareType::ScriptExtension, (ByteCodeValueType)(atom.script) });
else
ranges.empend(CompareTypeAndValuePair { CharacterCompareType::CharClass, (ByteCodeValueType)atom.character_class });
} else {
@ -1960,7 +1969,7 @@ bool ECMA262Parser::parse_unicode_property_escape(PropertyEscape& property, bool
return true;
},
[](Unicode::GeneralCategory) { return true; },
[](Unicode::Script) { return true; });
[](Script) { return true; });
}
StringView ECMA262Parser::read_capture_group_specifier(bool take_starting_angle_bracket)
@ -2026,7 +2035,10 @@ Optional<ECMA262Parser::PropertyEscape> ECMA262Parser::read_unicode_property_esc
return { *general_category };
} else if ((property_type == "Script"sv) || (property_type == "sc"sv)) {
if (auto script = Unicode::script_from_string(property_name); script.has_value())
return { *script };
return Script { *script, false };
} else if ((property_type == "Script_Extensions"sv) || (property_type == "scx"sv)) {
if (auto script = Unicode::script_from_string(property_name); script.has_value())
return Script { *script, true };
}
return {};