mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 23:17:46 +00:00
LibVT: Handle utf-8 search strings in find
Instead of iterating through the needle being searched one byte at a time (like an ascii string), we calculate its unicode code points first and then iterate through those.
This commit is contained in:
parent
c5360b1a5f
commit
25f1e81d62
1 changed files with 35 additions and 11 deletions
|
@ -671,15 +671,27 @@ VT::Range TerminalWidget::find_next(StringView needle, const VT::Position& start
|
|||
VT::Position start_of_potential_match;
|
||||
size_t needle_index = 0;
|
||||
|
||||
Utf8View unicode_needle(needle);
|
||||
Vector<u32> needle_code_points;
|
||||
for (u32 code_point : unicode_needle)
|
||||
needle_code_points.append(code_point);
|
||||
|
||||
do {
|
||||
auto ch = code_point_at(position);
|
||||
// FIXME: This is not the right way to use a Unicode needle!
|
||||
auto needle_ch = (u32)needle[needle_index];
|
||||
if (case_sensitivity ? ch == needle_ch : to_lowercase_code_point(ch) == to_lowercase_code_point(needle_ch)) {
|
||||
|
||||
bool code_point_matches = false;
|
||||
if (needle_index >= needle_code_points.size())
|
||||
code_point_matches = false;
|
||||
else if (case_sensitivity)
|
||||
code_point_matches = ch == needle_code_points[needle_index];
|
||||
else
|
||||
code_point_matches = to_lowercase_code_point(ch) == to_lowercase_code_point(needle_code_points[needle_index]);
|
||||
|
||||
if (code_point_matches) {
|
||||
if (needle_index == 0)
|
||||
start_of_potential_match = position;
|
||||
++needle_index;
|
||||
if (needle_index >= needle.length())
|
||||
if (needle_index >= needle_code_points.size())
|
||||
return { start_of_potential_match, position };
|
||||
} else {
|
||||
if (needle_index > 0)
|
||||
|
@ -700,23 +712,35 @@ VT::Range TerminalWidget::find_previous(StringView needle, const VT::Position& s
|
|||
VT::Position position = start.is_valid() ? start : VT::Position(m_terminal.line_count() - 1, m_terminal.line(m_terminal.line_count() - 1).length() - 1);
|
||||
VT::Position original_position = position;
|
||||
|
||||
Utf8View unicode_needle(needle);
|
||||
Vector<u32> needle_code_points;
|
||||
for (u32 code_point : unicode_needle)
|
||||
needle_code_points.append(code_point);
|
||||
|
||||
VT::Position end_of_potential_match;
|
||||
size_t needle_index = needle.length() - 1;
|
||||
size_t needle_index = needle_code_points.size() - 1;
|
||||
|
||||
do {
|
||||
auto ch = code_point_at(position);
|
||||
// FIXME: This is not the right way to use a Unicode needle!
|
||||
auto needle_ch = (u32)needle[needle_index];
|
||||
if (case_sensitivity ? ch == needle_ch : to_lowercase_code_point(ch) == to_lowercase_code_point(needle_ch)) {
|
||||
if (needle_index == needle.length() - 1)
|
||||
|
||||
bool code_point_matches = false;
|
||||
if (needle_index >= needle_code_points.size())
|
||||
code_point_matches = false;
|
||||
else if (case_sensitivity)
|
||||
code_point_matches = ch == needle_code_points[needle_index];
|
||||
else
|
||||
code_point_matches = to_lowercase_code_point(ch) == to_lowercase_code_point(needle_code_points[needle_index]);
|
||||
|
||||
if (code_point_matches) {
|
||||
if (needle_index == needle_code_points.size() - 1)
|
||||
end_of_potential_match = position;
|
||||
if (needle_index == 0)
|
||||
return { position, end_of_potential_match };
|
||||
--needle_index;
|
||||
} else {
|
||||
if (needle_index < needle.length() - 1)
|
||||
if (needle_index < needle_code_points.size() - 1)
|
||||
position = end_of_potential_match;
|
||||
needle_index = needle.length() - 1;
|
||||
needle_index = needle_code_points.size() - 1;
|
||||
}
|
||||
position = previous_position_before(position, should_wrap);
|
||||
} while (position.is_valid() && position != original_position);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue