1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 05:38:11 +00:00

LibRegex: Add basic support for unicode escapes in ECMA262Parser

This parses unicode escapes (and matches them only for utf8 strings).
This commit is contained in:
AnotherTest 2020-12-06 17:04:28 +03:30 committed by Andreas Kling
parent 86811683b0
commit 765d2977bc
2 changed files with 31 additions and 4 deletions

View file

@ -1066,8 +1066,24 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
}
if (try_skip("u")) {
// FIXME: Implement this path, unicode escape sequence.
TODO();
if (auto code_point = read_digits(ReadDigitsInitialZeroState::Allow, ReadDigitFollowPolicy::Any, true, 4); code_point.has_value()) {
// FIXME: The minimum length depends on the mode - should be utf8-length in u8 mode.
match_length_minimum += 1;
StringBuilder builder;
builder.append_code_point(code_point.value());
// FIXME: This isn't actually correct for ECMAScript.
auto u8_encoded = builder.string_view();
stack.insert_bytecode_compare_string(u8_encoded);
return true;
} else if (!unicode) {
// '\u' is allowed in non-unicode mode, just matches 'u'.
match_length_minimum += 1;
stack.insert_bytecode_compare_values({ { CharacterCompareType::Char, (ByteCodeValueType)'u' } });
return true;
} else {
set_error(Error::InvalidPattern);
return false;
}
}
// IdentityEscape
@ -1261,8 +1277,16 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
}
if (try_skip("u")) {
// FIXME: Implement this path, unicode escape sequence.
TODO();
if (auto code_point = read_digits(ReadDigitsInitialZeroState::Allow, ReadDigitFollowPolicy::Any, true, 4); code_point.has_value()) {
// FIXME: While codepoint ranges are supported, codepoint matches as "Char" are not!
return { { .code_point = code_point.value(), .is_character_class = false } };
} else if (!unicode) {
// '\u' is allowed in non-unicode mode, just matches 'u'.
return { { .code_point = 'u', .is_character_class = false } };
} else {
set_error(Error::InvalidPattern);
return {};
}
}
if (unicode) {