From f4e6dae6fec24df0f3ee721a964d091b62aa774e Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Fri, 18 Oct 2019 22:49:23 +0200 Subject: [PATCH] UTF-8: Add Utf8CodepointIterator::codepoint_length_in_bytes() This allows you to retrieve the length (in bytes) of the codepoint the iterator is currently pointing at. --- AK/Utf8View.cpp | 10 ++++++++++ AK/Utf8View.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index b63f674c9a..d5b73f4319 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -139,6 +139,16 @@ Utf8CodepointIterator& Utf8CodepointIterator::operator++() return *this; } +int Utf8CodepointIterator::codepoint_length_in_bytes() const +{ + ASSERT(m_length > 0); + int codepoint_length_in_bytes; + u32 value; + bool first_byte_makes_sense = decode_first_byte(*m_ptr, codepoint_length_in_bytes, value); + ASSERT(first_byte_makes_sense); + return codepoint_length_in_bytes; +} + u32 Utf8CodepointIterator::operator*() const { ASSERT(m_length > 0); diff --git a/AK/Utf8View.h b/AK/Utf8View.h index 5db0e1b7ea..cbc61aeead 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -18,6 +18,8 @@ public: Utf8CodepointIterator& operator++(); u32 operator*() const; + int codepoint_length_in_bytes() const; + private: Utf8CodepointIterator(const unsigned char*, int); const unsigned char* m_ptr { nullptr };