From 0e6375558dc60811a390ee014d76486ff28d8746 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Wed, 21 Jul 2021 16:38:12 -0400 Subject: [PATCH] AK+LibRegex: Partially implement case insensitive UTF-16 comparison This will work for ASCII code points. Unicode case folding will be needed for non-ASCII. --- AK/Utf16View.cpp | 17 +++++++++++++++++ AK/Utf16View.h | 2 ++ Userland/Libraries/LibRegex/RegexMatch.h | 5 +++++ 3 files changed, 24 insertions(+) diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp index a784822167..44a2a89f73 100644 --- a/AK/Utf16View.cpp +++ b/AK/Utf16View.cpp @@ -4,6 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause */ +#include #include #include #include @@ -214,6 +215,22 @@ bool Utf16View::operator==(Utf16View const& other) const return true; } +bool Utf16View::equals_ignoring_case(Utf16View const& other) const +{ + if (length_in_code_units() == 0) + return other.length_in_code_units() == 0; + if (length_in_code_units() != other.length_in_code_units()) + return false; + + for (size_t i = 0; i < length_in_code_units(); ++i) { + // FIXME: Handle non-ASCII case insensitive comparisons. + if (to_ascii_lowercase(m_code_units[i]) != to_ascii_lowercase(other.m_code_units[i])) + return false; + } + + return true; +} + Utf16CodePointIterator& Utf16CodePointIterator::operator++() { size_t code_units = length_in_code_units(); diff --git a/AK/Utf16View.h b/AK/Utf16View.h index fc1c4ca87d..5f58c12036 100644 --- a/AK/Utf16View.h +++ b/AK/Utf16View.h @@ -104,6 +104,8 @@ public: return validate(valid_code_units); } + bool equals_ignoring_case(Utf16View const&) const; + private: u16 const* begin_ptr() const { return m_code_units.data(); } u16 const* end_ptr() const { return begin_ptr() + m_code_units.size(); } diff --git a/Userland/Libraries/LibRegex/RegexMatch.h b/Userland/Libraries/LibRegex/RegexMatch.h index 6bc58ad78f..4ed5cab2f0 100644 --- a/Userland/Libraries/LibRegex/RegexMatch.h +++ b/Userland/Libraries/LibRegex/RegexMatch.h @@ -335,6 +335,11 @@ public: [&](StringView other_view) { return view.equals_ignoring_case(other_view); }, [](auto&) -> bool { TODO(); }); }, + [&](Utf16View view) { + return other.m_view.visit( + [&](Utf16View other_view) { return view.equals_ignoring_case(other_view); }, + [](auto&) -> bool { TODO(); }); + }, [](auto&) -> bool { TODO(); }); }