diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 7d93d73b78..effc7aae98 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -6,6 +6,7 @@ */ #include +#include #include #include @@ -100,9 +101,9 @@ bool Utf8View::validate(size_t& valid_bytes) const { valid_bytes = 0; for (auto ptr = begin_ptr(); ptr < end_ptr(); ptr++) { - size_t code_point_length_in_bytes; - u32 value; - bool first_byte_makes_sense = decode_first_byte(*ptr, code_point_length_in_bytes, value); + size_t code_point_length_in_bytes = 0; + u32 code_point = 0; + bool first_byte_makes_sense = decode_first_byte(*ptr, code_point_length_in_bytes, code_point); if (!first_byte_makes_sense) return false; @@ -112,8 +113,14 @@ bool Utf8View::validate(size_t& valid_bytes) const return false; if (*ptr >> 6 != 2) return false; + + code_point <<= 6; + code_point |= *ptr & 63; } + if (!is_unicode(code_point)) + return false; + valid_bytes += code_point_length_in_bytes; } diff --git a/Tests/AK/TestUtf8.cpp b/Tests/AK/TestUtf8.cpp index 7139c06aca..f8e4867258 100644 --- a/Tests/AK/TestUtf8.cpp +++ b/Tests/AK/TestUtf8.cpp @@ -70,6 +70,16 @@ TEST_CASE(validate_invalid_ut8) Utf8View utf8_4 { StringView { invalid_utf8_4 } }; EXPECT(!utf8_4.validate(valid_bytes)); EXPECT(valid_bytes == 0); + + char invalid_utf8_5[] = { (char)0xf4, (char)0x8f, (char)0xbf, (char)0xc0, 0 }; // U+110000 + Utf8View utf8_5 { StringView { invalid_utf8_5 } }; + EXPECT(!utf8_5.validate(valid_bytes)); + EXPECT(valid_bytes == 0); + + char invalid_utf8_6[] = { (char)0xf4, (char)0xa1, (char)0xb0, (char)0xbd, 0 }; // U+121c3d + Utf8View utf8_6 { StringView { invalid_utf8_6 } }; + EXPECT(!utf8_6.validate(valid_bytes)); + EXPECT(valid_bytes == 0); } TEST_CASE(iterate_utf8)