diff --git a/Tests/LibRegex/Regex.cpp b/Tests/LibRegex/Regex.cpp index 05be810b76..76b217b98f 100644 --- a/Tests/LibRegex/Regex.cpp +++ b/Tests/LibRegex/Regex.cpp @@ -757,6 +757,18 @@ TEST_CASE(ECMA262_unicode_match) { "(?<\\ud835\\udcd1\\ud835\\udcfb\\ud835\\udcf8\\ud835\\udd00\\ud835\\udcf7>brown)"sv, "brown"sv, true, ECMAScriptFlags::Unicode }, { "^\\s+$"sv, space_and_line_terminators }, { "^\\s+$"sv, space_and_line_terminators, true, ECMAScriptFlags::Unicode }, + { "[\\u0390]"sv, "\u1fd3"sv, false, ECMAScriptFlags::Unicode }, + { "[\\u1fd3]"sv, "\u0390"sv, false, ECMAScriptFlags::Unicode }, + { "[\\u0390]"sv, "\u1fd3"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) }, + { "[\\u1fd3]"sv, "\u0390"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) }, + { "[\\u03b0]"sv, "\u1fe3"sv, false, ECMAScriptFlags::Unicode }, + { "[\\u1fe3]"sv, "\u03b0"sv, false, ECMAScriptFlags::Unicode }, + { "[\\u03b0]"sv, "\u1fe3"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) }, + { "[\\u1fe3]"sv, "\u03b0"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) }, + { "[\\ufb05]"sv, "\ufb06"sv, false, ECMAScriptFlags::Unicode }, + { "[\\ufb06]"sv, "\ufb05"sv, false, ECMAScriptFlags::Unicode }, + { "[\\ufb05]"sv, "\ufb06"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) }, + { "[\\ufb06]"sv, "\ufb05"sv, true, combine_flags(ECMAScriptFlags::Unicode, ECMAScriptFlags::Insensitive) }, }; for (auto& test : tests) { diff --git a/Userland/Libraries/LibRegex/RegexByteCode.cpp b/Userland/Libraries/LibRegex/RegexByteCode.cpp index be4f609bff..c5ecf5d726 100644 --- a/Userland/Libraries/LibRegex/RegexByteCode.cpp +++ b/Userland/Libraries/LibRegex/RegexByteCode.cpp @@ -701,12 +701,19 @@ ALWAYS_INLINE void OpCode_Compare::compare_char(MatchInput const& input, MatchSt return; // FIXME: Figure out how to do this if unicode() without performing a substring split first. - auto input_view = input.view.unicode() ? input.view.substring_view(state.string_position, 1)[0] : input.view.code_unit_at(state.string_position_in_code_units); + auto input_view = input.view.unicode() + ? input.view.substring_view(state.string_position, 1)[0] + : input.view.code_unit_at(state.string_position_in_code_units); + bool equal; - if (input.regex_options & AllFlags::Insensitive) - equal = to_ascii_lowercase(input_view) == to_ascii_lowercase(ch1); // FIXME: Implement case-insensitive matching for non-ascii characters - else + if (input.regex_options & AllFlags::Insensitive) { + if (input.view.unicode()) + equal = Unicode::equals_ignoring_case(Utf32View { &input_view, 1 }, Utf32View { &ch1, 1 }); + else + equal = to_ascii_lowercase(input_view) == to_ascii_lowercase(ch1); + } else { equal = input_view == ch1; + } if (equal) { if (inverse)