From 55197ed4ef1ab39ede3598bc20d074023f9ec8d7 Mon Sep 17 00:00:00 2001 From: Sergey Bugaev Date: Wed, 4 Sep 2019 23:41:22 +0300 Subject: [PATCH] AK: Log UTF-8 validation errors --- AK/Utf8View.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 7e404522cd..ad54c164ed 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -1,4 +1,5 @@ #include +#include namespace AK { @@ -134,7 +135,13 @@ u32 Utf8CodepointIterator::operator*() const int codepoint_length_in_bytes; bool first_byte_makes_sense = decode_first_byte(m_ptr[0], codepoint_length_in_bytes, codepoint_value_so_far); + if (!first_byte_makes_sense) { + dbg() << "First byte doesn't make sense, bytes = " << (const char*)m_ptr; + } ASSERT(first_byte_makes_sense); + if (codepoint_length_in_bytes > m_length) { + dbg() << "Not enough bytes (need " << codepoint_length_in_bytes << ", have " << m_length << "), first byte is: " << m_ptr[0] << " " << (const char*)m_ptr; + } ASSERT(codepoint_length_in_bytes <= m_length); for (int offset = 1; offset < codepoint_length_in_bytes; offset++) {