LibVT: Handle utf-8 search strings in find

Instead of iterating through the needle being searched one byte at a time (like an ascii string), we calculate its unicode code points first and then iterate through those.
2025-07-27 08:07:34 +00:00 · 2023-01-10 13:32:38 +03:00 · 2023-01-10 13:32:38 +03:00 · 25f1e81d62
commit 25f1e81d62
parent c5360b1a5f
1 changed files with 35 additions and 11 deletions
--- a/Userland/Libraries/LibVT/TerminalWidget.cpp
+++ b/Userland/Libraries/LibVT/TerminalWidget.cpp
@ -671,15 +671,27 @@ VT::Range TerminalWidget::find_next(StringView needle, const VT::Position& start
    VT::Position start_of_potential_match;
    size_t needle_index = 0;

+    Utf8View unicode_needle(needle);
+    Vector<u32> needle_code_points;
+    for (u32 code_point : unicode_needle)
+        needle_code_points.append(code_point);
+
    do {
        auto ch = code_point_at(position);
-        // FIXME: This is not the right way to use a Unicode needle!
-        auto needle_ch = (u32)needle[needle_index];
-        if (case_sensitivity ? ch == needle_ch : to_lowercase_code_point(ch) == to_lowercase_code_point(needle_ch)) {
+
+        bool code_point_matches = false;
+        if (needle_index >= needle_code_points.size())
+            code_point_matches = false;
+        else if (case_sensitivity)
+            code_point_matches = ch == needle_code_points[needle_index];
+        else
+            code_point_matches = to_lowercase_code_point(ch) == to_lowercase_code_point(needle_code_points[needle_index]);
+
+        if (code_point_matches) {
            if (needle_index == 0)
                start_of_potential_match = position;
            ++needle_index;
-            if (needle_index >= needle.length())
+            if (needle_index >= needle_code_points.size())
                return { start_of_potential_match, position };
        } else {
            if (needle_index > 0)
@ -700,23 +712,35 @@ VT::Range TerminalWidget::find_previous(StringView needle, const VT::Position& s
    VT::Position position = start.is_valid() ? start : VT::Position(m_terminal.line_count() - 1, m_terminal.line(m_terminal.line_count() - 1).length() - 1);
    VT::Position original_position = position;

+    Utf8View unicode_needle(needle);
+    Vector<u32> needle_code_points;
+    for (u32 code_point : unicode_needle)
+        needle_code_points.append(code_point);
+
    VT::Position end_of_potential_match;
-    size_t needle_index = needle.length() - 1;
+    size_t needle_index = needle_code_points.size() - 1;

    do {
        auto ch = code_point_at(position);
-        // FIXME: This is not the right way to use a Unicode needle!
-        auto needle_ch = (u32)needle[needle_index];
-        if (case_sensitivity ? ch == needle_ch : to_lowercase_code_point(ch) == to_lowercase_code_point(needle_ch)) {
-            if (needle_index == needle.length() - 1)
+
+        bool code_point_matches = false;
+        if (needle_index >= needle_code_points.size())
+            code_point_matches = false;
+        else if (case_sensitivity)
+            code_point_matches = ch == needle_code_points[needle_index];
+        else
+            code_point_matches = to_lowercase_code_point(ch) == to_lowercase_code_point(needle_code_points[needle_index]);
+
+        if (code_point_matches) {
+            if (needle_index == needle_code_points.size() - 1)
                end_of_potential_match = position;
            if (needle_index == 0)
                return { position, end_of_potential_match };
            --needle_index;
        } else {
-            if (needle_index < needle.length() - 1)
+            if (needle_index < needle_code_points.size() - 1)
                position = end_of_potential_match;
-            needle_index = needle.length() - 1;
+            needle_index = needle_code_points.size() - 1;
        }
        position = previous_position_before(position, should_wrap);
    } while (position.is_valid() && position != original_position);