diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 53276017c9..02944a42a8 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2019-2020, Sergey Bugaev + * Copyright (c) 2021, Max Wipfli * * SPDX-License-Identifier: BSD-2-Clause */ @@ -59,6 +60,25 @@ Utf8View Utf8View::substring_view(size_t byte_offset, size_t byte_length) const return Utf8View { string }; } +Utf8View Utf8View::unicode_substring_view(size_t codepoint_offset, size_t codepoint_length) const +{ + if (codepoint_length == 0) + return {}; + + size_t codepoint_index = 0, offset_in_bytes = 0; + for (auto iterator = begin(); !iterator.done(); ++iterator) { + if (codepoint_index == codepoint_offset) + offset_in_bytes = byte_offset_of(iterator); + if (codepoint_index == codepoint_offset + codepoint_length - 1) { + size_t length_in_bytes = byte_offset_of(++iterator) - offset_in_bytes; + return substring_view(offset_in_bytes, length_in_bytes); + } + ++codepoint_index; + } + + VERIFY_NOT_REACHED(); +} + static inline bool decode_first_byte( unsigned char byte, size_t& out_code_point_length_in_bytes, diff --git a/AK/Utf8View.h b/AK/Utf8View.h index 8fff7990e6..d9239e6b62 100644 --- a/AK/Utf8View.h +++ b/AK/Utf8View.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2019-2020, Sergey Bugaev + * Copyright (c) 2021, Max Wipfli * * SPDX-License-Identifier: BSD-2-Clause */ @@ -57,9 +58,13 @@ public: const unsigned char* bytes() const { return begin_ptr(); } size_t byte_length() const { return m_string.length(); } size_t byte_offset_of(const Utf8CodepointIterator&) const; - Utf8View substring_view(size_t byte_offset, size_t byte_length) const; - bool is_empty() const { return m_string.is_empty(); } + Utf8View substring_view(size_t byte_offset, size_t byte_length) const; + Utf8View substring_view(size_t byte_offset) const { return substring_view(byte_offset, byte_length() - byte_offset); } + Utf8View unicode_substring_view(size_t codepoint_offset, size_t codepoint_length) const; + Utf8View unicode_substring_view(size_t codepoint_offset) const { return unicode_substring_view(codepoint_offset, length() - codepoint_offset); } + + bool is_empty() const { return m_string.is_empty(); } bool starts_with(const Utf8View&) const; size_t iterator_offset(const Utf8CodepointIterator& it) const