From edecf8f6a375b04e60e7c4ddcc10b40f350a5d77 Mon Sep 17 00:00:00 2001 From: Idan Horowitz Date: Sun, 21 Mar 2021 22:31:15 +0200 Subject: [PATCH] AK: Add starts_with to Utf8View Unlike String/StringView::starts_with this compares utf8 code points instead of "characters" (bytes), which is important when handling aribtary utf-8 input that could include overlong characters. --- AK/Utf8View.cpp | 18 ++++++++++++++++++ AK/Utf8View.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index d28be5e425..a6fba541f2 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -144,6 +144,24 @@ size_t Utf8View::calculate_length() const return length; } +bool Utf8View::starts_with(const Utf8View& start) const +{ + if (start.is_empty()) + return true; + if (is_empty()) + return false; + if (start.length() > length()) + return false; + if (begin_ptr() == start.begin_ptr()) + return true; + + for (auto k = begin(), l = start.begin(); l != start.end(); ++k, ++l) { + if (*k != *l) + return false; + } + return true; +} + Utf8CodepointIterator::Utf8CodepointIterator(const unsigned char* ptr, size_t length) : m_ptr(ptr) , m_length(length) diff --git a/AK/Utf8View.h b/AK/Utf8View.h index af21c6792c..fe3b84029c 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -80,6 +80,8 @@ public: Utf8View substring_view(int byte_offset, int byte_length) const; bool is_empty() const { return m_string.is_empty(); } + bool starts_with(const Utf8View&) const; + size_t iterator_offset(const Utf8CodepointIterator& it) const { return byte_offset_of(it);