1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 08:04:57 +00:00

AK: Ensure short String instances are valid UTF-8

We are currently only validating long strings.
This commit is contained in:
Timothy Flynn 2023-03-03 09:03:45 -05:00 committed by Tim Flynn
parent 434ca78425
commit da0d000909
3 changed files with 37 additions and 15 deletions

View file

@ -11,7 +11,6 @@
#include <AK/MemMem.h>
#include <AK/Stream.h>
#include <AK/String.h>
#include <AK/Utf8View.h>
#include <AK/Vector.h>
#include <stdlib.h>
@ -132,10 +131,6 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data,
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT);
Utf8View view(StringView(utf8_data, byte_count));
if (!view.validate())
return Error::from_string_literal("StringData::from_utf8: Input was not valid UTF-8");
VERIFY(utf8_data);
u8* buffer = nullptr;
auto new_string_data = TRY(create_uninitialized(byte_count, buffer));
@ -143,6 +138,16 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data,
return new_string_data;
}
static ErrorOr<void> read_stream_into_buffer(Stream& stream, Bytes buffer)
{
TRY(stream.read_entire_buffer(buffer));
if (!Utf8View { StringView { buffer } }.validate())
return Error::from_string_literal("String::from_stream: Input was not valid UTF-8");
return {};
}
ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_t byte_count)
{
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
@ -150,12 +155,7 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_
u8* buffer = nullptr;
auto new_string_data = TRY(create_uninitialized(byte_count, buffer));
Bytes new_string_bytes = { buffer, byte_count };
TRY(stream.read_entire_buffer(new_string_bytes));
Utf8View view(StringView { new_string_bytes });
if (!view.validate())
return Error::from_string_literal("StringData::from_stream: Input was not valid UTF-8");
TRY(read_stream_into_buffer(stream, { buffer, byte_count }));
return new_string_data;
}
@ -230,6 +230,9 @@ void String::destroy_string()
ErrorOr<String> String::from_utf8(StringView view)
{
if (!Utf8View { view }.validate())
return Error::from_string_literal("String::from_utf8: Input was not valid UTF-8");
if (view.length() <= MAX_SHORT_STRING_BYTE_COUNT) {
ShortString short_string;
if (!view.is_empty())
@ -246,7 +249,7 @@ ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)
if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) {
ShortString short_string;
if (byte_count > 0)
TRY(stream.read_entire_buffer({ short_string.storage, byte_count }));
TRY(Detail::read_stream_into_buffer(stream, { short_string.storage, byte_count }));
short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG;
return String { short_string };
}
@ -587,9 +590,6 @@ DeprecatedString String::to_deprecated_string() const
ErrorOr<String> String::from_deprecated_string(DeprecatedString const& deprecated_string)
{
Utf8View view(deprecated_string);
if (!view.validate())
return Error::from_string_literal("String::from_deprecated_string: Input was not valid UTF-8");
return String::from_utf8(deprecated_string.view());
}