mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 15:47:44 +00:00
LibTextCodec: Support validating encoded inputs
This commit is contained in:
parent
ad4470bc39
commit
079c96376c
2 changed files with 55 additions and 0 deletions
|
@ -232,6 +232,12 @@ StringView get_output_encoding(StringView encoding)
|
||||||
return encoding;
|
return encoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Decoder::validate(StringView)
|
||||||
|
{
|
||||||
|
// By-default we assume that any input sequence is valid, character encodings that do not accept all inputs may override this
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
ErrorOr<String> Decoder::to_utf8(StringView input)
|
ErrorOr<String> Decoder::to_utf8(StringView input)
|
||||||
{
|
{
|
||||||
StringBuilder builder(input.length());
|
StringBuilder builder(input.length());
|
||||||
|
@ -247,6 +253,11 @@ ErrorOr<void> UTF8Decoder::process(StringView input, Function<ErrorOr<void>(u32)
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool UTF8Decoder::validate(StringView input)
|
||||||
|
{
|
||||||
|
return Utf8View(input).validate();
|
||||||
|
}
|
||||||
|
|
||||||
ErrorOr<String> UTF8Decoder::to_utf8(StringView input)
|
ErrorOr<String> UTF8Decoder::to_utf8(StringView input)
|
||||||
{
|
{
|
||||||
// Discard the BOM
|
// Discard the BOM
|
||||||
|
@ -299,6 +310,26 @@ ErrorOr<void> UTF16BEDecoder::process(StringView input, Function<ErrorOr<void>(u
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool UTF16BEDecoder::validate(StringView input)
|
||||||
|
{
|
||||||
|
size_t utf16_length = input.length() - (input.length() % 2);
|
||||||
|
for (size_t i = 0; i < utf16_length; i += 2) {
|
||||||
|
u16 w1 = (static_cast<u8>(input[i]) << 8) | static_cast<u8>(input[i + 1]);
|
||||||
|
if (!is_unicode_surrogate(w1))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
u16 w2 = (static_cast<u8>(input[i + 2]) << 8) | static_cast<u8>(input[i + 3]);
|
||||||
|
if (!Utf16View::is_low_surrogate(w2))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
i += 2;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
ErrorOr<String> UTF16BEDecoder::to_utf8(StringView input)
|
ErrorOr<String> UTF16BEDecoder::to_utf8(StringView input)
|
||||||
{
|
{
|
||||||
// Discard the BOM
|
// Discard the BOM
|
||||||
|
@ -352,6 +383,26 @@ ErrorOr<void> UTF16LEDecoder::process(StringView input, Function<ErrorOr<void>(u
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool UTF16LEDecoder::validate(StringView input)
|
||||||
|
{
|
||||||
|
size_t utf16_length = input.length() - (input.length() % 2);
|
||||||
|
for (size_t i = 0; i < utf16_length; i += 2) {
|
||||||
|
u16 w1 = static_cast<u8>(input[i]) | (static_cast<u8>(input[i + 1]) << 8);
|
||||||
|
if (!is_unicode_surrogate(w1))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (!Utf16View::is_high_surrogate(w1) || i + 2 == utf16_length)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
u16 w2 = static_cast<u8>(input[i + 2]) | (static_cast<u8>(input[i + 3]) << 8);
|
||||||
|
if (!Utf16View::is_low_surrogate(w2))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
i += 2;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
ErrorOr<String> UTF16LEDecoder::to_utf8(StringView input)
|
ErrorOr<String> UTF16LEDecoder::to_utf8(StringView input)
|
||||||
{
|
{
|
||||||
// Discard the BOM
|
// Discard the BOM
|
||||||
|
|
|
@ -18,6 +18,7 @@ namespace TextCodec {
|
||||||
class Decoder {
|
class Decoder {
|
||||||
public:
|
public:
|
||||||
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) = 0;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) = 0;
|
||||||
|
virtual bool validate(StringView);
|
||||||
virtual ErrorOr<String> to_utf8(StringView);
|
virtual ErrorOr<String> to_utf8(StringView);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
|
@ -27,18 +28,21 @@ protected:
|
||||||
class UTF8Decoder final : public Decoder {
|
class UTF8Decoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
|
virtual bool validate(StringView) override;
|
||||||
virtual ErrorOr<String> to_utf8(StringView) override;
|
virtual ErrorOr<String> to_utf8(StringView) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class UTF16BEDecoder final : public Decoder {
|
class UTF16BEDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
|
virtual bool validate(StringView) override;
|
||||||
virtual ErrorOr<String> to_utf8(StringView) override;
|
virtual ErrorOr<String> to_utf8(StringView) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
class UTF16LEDecoder final : public Decoder {
|
class UTF16LEDecoder final : public Decoder {
|
||||||
public:
|
public:
|
||||||
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
virtual ErrorOr<void> process(StringView, Function<ErrorOr<void>(u32)> on_code_point) override;
|
||||||
|
virtual bool validate(StringView) override;
|
||||||
virtual ErrorOr<String> to_utf8(StringView) override;
|
virtual ErrorOr<String> to_utf8(StringView) override;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue