From c4ee57653128d30ede233907ee1116e3713ec6ff Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Mon, 16 Aug 2021 10:27:26 -0400 Subject: [PATCH] AK: Add Utf8View::byte_offset_of overload for code point index lookups --- AK/Utf8View.cpp | 15 +++++++++++++++ AK/Utf8View.h | 1 + 2 files changed, 16 insertions(+) diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 5a1b665e37..4d2f5d62e1 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -65,6 +65,21 @@ size_t Utf8View::byte_offset_of(const Utf8CodePointIterator& it) const return it.m_ptr - begin_ptr(); } +size_t Utf8View::byte_offset_of(size_t code_point_offset) const +{ + size_t byte_offset = 0; + + for (auto it = begin(); !it.done(); ++it) { + if (code_point_offset == 0) + return byte_offset; + + byte_offset += it.underlying_code_point_length_in_bytes(); + --code_point_offset; + } + + return byte_offset; +} + Utf8View Utf8View::substring_view(size_t byte_offset, size_t byte_length) const { StringView string = m_string.substring_view(byte_offset, byte_length); diff --git a/AK/Utf8View.h b/AK/Utf8View.h index 5832c6c9fc..306634de85 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -67,6 +67,7 @@ public: const unsigned char* bytes() const { return begin_ptr(); } size_t byte_length() const { return m_string.length(); } size_t byte_offset_of(const Utf8CodePointIterator&) const; + size_t byte_offset_of(size_t code_point_offset) const; Utf8View substring_view(size_t byte_offset, size_t byte_length) const; Utf8View substring_view(size_t byte_offset) const { return substring_view(byte_offset, byte_length() - byte_offset); }