1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 10:48:11 +00:00

AK: Track byte length, rather than code point length, in Utf8View::trim

Utf8View::trim uses Utf8View::substring_view to return its result, which
requires the input to be a byte offset/length rather than code point
length.
This commit is contained in:
Timothy Flynn 2021-07-16 12:40:46 -04:00 committed by Linus Groh
parent 660a8982e7
commit 87848cdf7d
3 changed files with 49 additions and 8 deletions

View file

@ -185,24 +185,24 @@ bool Utf8View::contains(u32 needle) const
Utf8View Utf8View::trim(const Utf8View& characters, TrimMode mode) const
{
size_t substring_start = 0;
size_t substring_length = length();
size_t substring_length = byte_length();
if (mode == TrimMode::Left || mode == TrimMode::Both) {
for (auto code_point : *this) {
for (auto code_point = begin(); code_point != end(); ++code_point) {
if (substring_length == 0)
return {};
if (!characters.contains(code_point))
if (!characters.contains(*code_point))
break;
++substring_start;
--substring_length;
substring_start += code_point.underlying_code_point_length_in_bytes();
substring_length -= code_point.underlying_code_point_length_in_bytes();
}
}
if (mode == TrimMode::Right || mode == TrimMode::Both) {
size_t seen_whitespace_length = 0;
for (auto code_point : *this) {
if (characters.contains(code_point))
seen_whitespace_length++;
for (auto code_point = begin(); code_point != end(); ++code_point) {
if (characters.contains(*code_point))
seen_whitespace_length += code_point.underlying_code_point_length_in_bytes();
else
seen_whitespace_length = 0;
}