From a72bb34970e5f1f0ed3d28790431e1df37b7dac7 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Tue, 18 May 2021 16:09:20 +0200 Subject: [PATCH] AK: Add Utf8View::iterator_at_byte_offset method This implements a method to get a Utf8CodepointIterator at a specified byte offset. --- AK/Utf8View.cpp | 11 +++++++++++ AK/Utf8View.h | 1 + 2 files changed, 12 insertions(+) diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 02944a42a8..0826eb0503 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -46,6 +46,17 @@ Utf8CodepointIterator Utf8View::end() const return { end_ptr(), 0 }; } +Utf8CodepointIterator Utf8View::iterator_at_byte_offset(size_t byte_offset) const +{ + size_t current_offset = 0; + for (auto iterator = begin(); !iterator.done(); ++iterator) { + if (current_offset >= byte_offset) + return iterator; + current_offset += iterator.code_point_length_in_bytes(); + } + return end(); +} + size_t Utf8View::byte_offset_of(const Utf8CodepointIterator& it) const { VERIFY(it.m_ptr >= begin_ptr()); diff --git a/AK/Utf8View.h b/AK/Utf8View.h index d9239e6b62..7324bd2984 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -54,6 +54,7 @@ public: Utf8CodepointIterator begin() const; Utf8CodepointIterator end() const; + Utf8CodepointIterator iterator_at_byte_offset(size_t) const; const unsigned char* bytes() const { return begin_ptr(); } size_t byte_length() const { return m_string.length(); }