mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 13:57:35 +00:00
LibRegex+Everywhere: Make LibRegex more unicode-aware
This commit makes LibRegex (mostly) capable of operating on any of the three main string views: - StringView for raw strings - Utf8View for utf-8 encoded strings - Utf32View for raw unicode strings As a result, regexps with unicode strings should be able to properly handle utf-8 and not stop in the middle of a code point. A future commit will update LibJS to use the correct type of string depending on the flags.
This commit is contained in:
parent
e5af15a6e9
commit
f364fcec5d
8 changed files with 310 additions and 207 deletions
|
@ -249,7 +249,7 @@ TEST_CASE(char_utf8)
|
|||
Regex<PosixExtended> re("😀");
|
||||
RegexResult result;
|
||||
|
||||
EXPECT_EQ((result = match("Привет, мир! 😀 γειά σου κόσμος 😀 こんにちは世界", re, PosixFlags::Global)).success, true);
|
||||
EXPECT_EQ((result = match(Utf8View { "Привет, мир! 😀 γειά σου κόσμος 😀 こんにちは世界" }, re, PosixFlags::Global)).success, true);
|
||||
EXPECT_EQ(result.count, 2u);
|
||||
}
|
||||
|
||||
|
@ -312,7 +312,6 @@ TEST_CASE(match_all_character_class)
|
|||
EXPECT_EQ(result.matches.at(0).view, "W");
|
||||
EXPECT_EQ(result.matches.at(1).view, "i");
|
||||
EXPECT_EQ(result.matches.at(2).view, "n");
|
||||
EXPECT(&result.matches.at(0).view.characters_without_null_termination()[0] != &str.view().characters_without_null_termination()[1]);
|
||||
}
|
||||
|
||||
TEST_CASE(match_character_class_with_assertion)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue