AK+LibUnicode: Implement String::equals_ignoring_case without allocating

We currently fully casefold the left- and right-hand sides to compare two strings with case-insensitivity. Now, we casefold one code point at a time, storing the result in a view for comparison, until we exhaust both strings.
2025-07-02 12:42:13 +00:00 · 2023-03-08 13:03:04 -05:00 · 2023-03-08 13:03:04 -05:00 · 1393ed2000
commit 1393ed2000
parent 4aee4e80bd
6 changed files with 150 additions and 52 deletions
--- a/Userland/Libraries/LibUnicode/UnicodeUtils.cpp
+++ b/Userland/Libraries/LibUnicode/UnicodeUtils.cpp
@ -330,31 +330,32 @@ ErrorOr<void> build_titlecase_string([[maybe_unused]] Utf8View code_points, [[ma
 }

 // https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
-ErrorOr<void> build_casefold_string([[maybe_unused]] Utf8View code_points, [[maybe_unused]] StringBuilder& builder)
+ErrorOr<void> build_casefold_string(Utf8View code_points, StringBuilder& builder)
 {
-#if ENABLE_UNICODE_DATA
    // toCasefold(X): Map each character C in X to Case_Folding(C).
-    //
-    // Case_Folding(C) uses the mappings with the status field value “C” or “F” in the data file
-    // CaseFolding.txt in the Unicode Character Database.
-
-    using enum CaseFoldingStatus;
-
    for (auto code_point : code_points) {
-        auto const* case_folding = find_matching_case_folding<Common, Full>(code_point);
-        if (!case_folding) {
-            TRY(builder.try_append_code_point(code_point));
-            continue;
-        }
-
-        for (size_t i = 0; i < case_folding->mapping_size; ++i)
-            TRY(builder.try_append_code_point(case_folding->mapping[i]));
+        auto case_folding = casefold_code_point(code_point);
+        TRY(builder.try_append(case_folding));
    }

    return {};
-#else
-    return Error::from_string_literal("Unicode data has been disabled");
+}
+
+// https://www.unicode.org/reports/tr44/#CaseFolding.txt
+// https://www.unicode.org/versions/Unicode15.0.0/ch03.pdf#G53253
+Utf32View casefold_code_point(u32 const& code_point)
+{
+#if ENABLE_UNICODE_DATA
+    // Case_Folding(C) uses the mappings with the status field value “C” or “F” in the data file
+    // CaseFolding.txt in the Unicode Character Database.
+    using enum CaseFoldingStatus;
+
+    if (auto const* case_folding = find_matching_case_folding<Common, Full>(code_point))
+        return Utf32View { case_folding->mapping, case_folding->mapping_size };
 #endif
+
+    // The case foldings are omitted in the data file if they are the same as the code point itself.
+    return Utf32View { &code_point, 1 };
 }

 }