1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 23:17:46 +00:00

LibVT: Handle utf-8 search strings in find

Instead of iterating through the needle being searched one byte at a
time (like an ascii string), we calculate its unicode code points first
and then iterate through those.
This commit is contained in:
Arda Cinar 2023-01-10 13:32:38 +03:00 committed by Andreas Kling
parent c5360b1a5f
commit 25f1e81d62

View file

@ -671,15 +671,27 @@ VT::Range TerminalWidget::find_next(StringView needle, const VT::Position& start
VT::Position start_of_potential_match;
size_t needle_index = 0;
Utf8View unicode_needle(needle);
Vector<u32> needle_code_points;
for (u32 code_point : unicode_needle)
needle_code_points.append(code_point);
do {
auto ch = code_point_at(position);
// FIXME: This is not the right way to use a Unicode needle!
auto needle_ch = (u32)needle[needle_index];
if (case_sensitivity ? ch == needle_ch : to_lowercase_code_point(ch) == to_lowercase_code_point(needle_ch)) {
bool code_point_matches = false;
if (needle_index >= needle_code_points.size())
code_point_matches = false;
else if (case_sensitivity)
code_point_matches = ch == needle_code_points[needle_index];
else
code_point_matches = to_lowercase_code_point(ch) == to_lowercase_code_point(needle_code_points[needle_index]);
if (code_point_matches) {
if (needle_index == 0)
start_of_potential_match = position;
++needle_index;
if (needle_index >= needle.length())
if (needle_index >= needle_code_points.size())
return { start_of_potential_match, position };
} else {
if (needle_index > 0)
@ -700,23 +712,35 @@ VT::Range TerminalWidget::find_previous(StringView needle, const VT::Position& s
VT::Position position = start.is_valid() ? start : VT::Position(m_terminal.line_count() - 1, m_terminal.line(m_terminal.line_count() - 1).length() - 1);
VT::Position original_position = position;
Utf8View unicode_needle(needle);
Vector<u32> needle_code_points;
for (u32 code_point : unicode_needle)
needle_code_points.append(code_point);
VT::Position end_of_potential_match;
size_t needle_index = needle.length() - 1;
size_t needle_index = needle_code_points.size() - 1;
do {
auto ch = code_point_at(position);
// FIXME: This is not the right way to use a Unicode needle!
auto needle_ch = (u32)needle[needle_index];
if (case_sensitivity ? ch == needle_ch : to_lowercase_code_point(ch) == to_lowercase_code_point(needle_ch)) {
if (needle_index == needle.length() - 1)
bool code_point_matches = false;
if (needle_index >= needle_code_points.size())
code_point_matches = false;
else if (case_sensitivity)
code_point_matches = ch == needle_code_points[needle_index];
else
code_point_matches = to_lowercase_code_point(ch) == to_lowercase_code_point(needle_code_points[needle_index]);
if (code_point_matches) {
if (needle_index == needle_code_points.size() - 1)
end_of_potential_match = position;
if (needle_index == 0)
return { position, end_of_potential_match };
--needle_index;
} else {
if (needle_index < needle.length() - 1)
if (needle_index < needle_code_points.size() - 1)
position = end_of_potential_match;
needle_index = needle.length() - 1;
needle_index = needle_code_points.size() - 1;
}
position = previous_position_before(position, should_wrap);
} while (position.is_valid() && position != original_position);