From e7ba03ddd109dda3f0d1cdac4a8053a4753bfbf0 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Thu, 24 Nov 2022 14:57:20 +0100 Subject: [PATCH] AK: Add Utf8View::iterator_at_byte_offset_without_validation() Unlike iterator_at_byte_offset(), this function assumes the provided byte offset is a valid offset into the UTF-8 character stream. This avoids walking the stream from the start. --- AK/Utf8View.cpp | 5 +++++ AK/Utf8View.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 7e7788a0ff..5ebfccedfd 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -24,6 +24,11 @@ Utf8CodePointIterator Utf8View::iterator_at_byte_offset(size_t byte_offset) cons return end(); } +Utf8CodePointIterator Utf8View::iterator_at_byte_offset_without_validation(size_t byte_offset) const +{ + return Utf8CodePointIterator { reinterpret_cast(m_string.characters_without_null_termination()) + byte_offset, m_string.length() - byte_offset }; +} + size_t Utf8View::byte_offset_of(Utf8CodePointIterator const& it) const { VERIFY(it.m_ptr >= begin_ptr()); diff --git a/AK/Utf8View.h b/AK/Utf8View.h index 19dd62ff1d..6c84ce42ab 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -80,6 +80,8 @@ public: Utf8CodePointIterator end() const { return { end_ptr(), 0 }; } Utf8CodePointIterator iterator_at_byte_offset(size_t) const; + Utf8CodePointIterator iterator_at_byte_offset_without_validation(size_t) const; + unsigned char const* bytes() const { return begin_ptr(); } size_t byte_length() const { return m_string.length(); } size_t byte_offset_of(Utf8CodePointIterator const&) const;