1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 14:38:11 +00:00

LibTextCodec: Support validating encoded inputs

This commit is contained in:
Idan Horowitz 2023-11-17 15:08:44 +02:00 committed by Andreas Kling
parent ad4470bc39
commit 079c96376c
2 changed files with 55 additions and 0 deletions

View file

@ -232,6 +232,12 @@ StringView get_output_encoding(StringView encoding)
return encoding;
}
bool Decoder::validate(StringView)
{
// By-default we assume that any input sequence is valid, character encodings that do not accept all inputs may override this
return true;
}
ErrorOr<String> Decoder::to_utf8(StringView input)
{
StringBuilder builder(input.length());
@ -247,6 +253,11 @@ ErrorOr<void> UTF8Decoder::process(StringView input, Function<ErrorOr<void>(u32)
return {};
}
bool UTF8Decoder::validate(StringView input)
{
return Utf8View(input).validate();
}
ErrorOr<String> UTF8Decoder::to_utf8(StringView input)
{
// Discard the BOM
@ -299,6 +310,26 @@ ErrorOr<void> UTF16BEDecoder::process(StringView input, Function<ErrorOr<void>(u
return {};
}
bool UTF16BEDecoder::validate(StringView input)
{
size_t utf16_length = input.length() - (input.length() % 2);
for (size_t i = 0; i < utf16_length; i += 2) {
u16 w1 = (static_cast<u8>(input[i]) << 8) | static_cast<u8>(input[i + 1]);
if (!is_unicode_surrogate(w1))
continue;
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length)
return false;
u16 w2 = (static_cast<u8>(input[i + 2]) << 8) | static_cast<u8>(input[i + 3]);
if (!Utf16View::is_low_surrogate(w2))
return false;
i += 2;
}
return true;
}
ErrorOr<String> UTF16BEDecoder::to_utf8(StringView input)
{
// Discard the BOM
@ -352,6 +383,26 @@ ErrorOr<void> UTF16LEDecoder::process(StringView input, Function<ErrorOr<void>(u
return {};
}
bool UTF16LEDecoder::validate(StringView input)
{
size_t utf16_length = input.length() - (input.length() % 2);
for (size_t i = 0; i < utf16_length; i += 2) {
u16 w1 = static_cast<u8>(input[i]) | (static_cast<u8>(input[i + 1]) << 8);
if (!is_unicode_surrogate(w1))
continue;
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length)
return false;
u16 w2 = static_cast<u8>(input[i + 2]) | (static_cast<u8>(input[i + 3]) << 8);
if (!Utf16View::is_low_surrogate(w2))
return false;
i += 2;
}
return true;
}
ErrorOr<String> UTF16LEDecoder::to_utf8(StringView input)
{
// Discard the BOM