1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 05:18:12 +00:00

AK+LibUnicode: Implement String::equals_ignoring_case without allocating

We currently fully casefold the left- and right-hand sides to compare
two strings with case-insensitivity. Now, we casefold one code point at
a time, storing the result in a view for comparison, until we exhaust
both strings.
This commit is contained in:
Timothy Flynn 2023-03-08 13:03:04 -05:00 committed by Linus Groh
parent 4aee4e80bd
commit 1393ed2000
6 changed files with 150 additions and 52 deletions

View file

@ -6,6 +6,8 @@
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/Utf32View.h>
#include <AK/Utf8View.h>
#include <LibUnicode/UnicodeUtils.h>
// This file contains definitions of AK::String methods which require UCD data.
@ -40,12 +42,60 @@ ErrorOr<String> String::to_casefold() const
return builder.to_string();
}
class CasefoldStringComparator {
public:
explicit CasefoldStringComparator(Utf8View string)
: m_string(string)
, m_it(m_string.begin())
{
}
bool has_more_data() const
{
return !m_casefolded_code_points.is_empty() || (m_it != m_string.end());
}
u32 next_code_point()
{
VERIFY(has_more_data());
if (m_casefolded_code_points.is_empty()) {
m_current_code_point = *m_it;
++m_it;
m_casefolded_code_points = Unicode::Detail::casefold_code_point(m_current_code_point);
VERIFY(!m_casefolded_code_points.is_empty()); // Must at least contain the provided code point.
}
auto code_point = m_casefolded_code_points[0];
m_casefolded_code_points = m_casefolded_code_points.substring_view(1);
return code_point;
}
private:
Utf8View m_string;
Utf8CodePointIterator m_it;
u32 m_current_code_point { 0 };
Utf32View m_casefolded_code_points;
};
// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G34145
ErrorOr<bool> String::equals_ignoring_case(String const& other) const
bool String::equals_ignoring_case(String const& other) const
{
// A string X is a caseless match for a string Y if and only if:
// toCasefold(X) = toCasefold(Y)
return TRY(to_casefold()) == TRY(other.to_casefold());
CasefoldStringComparator lhs { code_points() };
CasefoldStringComparator rhs { other.code_points() };
while (lhs.has_more_data() && rhs.has_more_data()) {
if (lhs.next_code_point() != rhs.next_code_point())
return false;
}
return !lhs.has_more_data() && !rhs.has_more_data();
}
}