1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 09:04:59 +00:00

LibTextCodec+AK: Don't validate UTF-8 strings twice

UTF8Decoder was already converting invalid data into replacement
characters while converting, so we know for sure we have valid UTF-8
by the time conversion is finished.

This patch adds a new StringBuilder::to_string_without_validation()
and uses it to make UTF8Decoder avoid half the work it was doing.
This commit is contained in:
Andreas Kling 2023-12-29 15:30:15 +01:00
parent a285e36041
commit 3c039903fb
5 changed files with 22 additions and 1 deletions

View file

@ -233,6 +233,19 @@ void String::destroy_string()
m_data->unref();
}
String String::from_utf8_without_validation(ReadonlyBytes bytes)
{
if (bytes.size() <= MAX_SHORT_STRING_BYTE_COUNT) {
ShortString short_string;
if (!bytes.is_empty())
memcpy(short_string.storage, bytes.data(), bytes.size());
short_string.byte_count_and_short_string_flag = (bytes.size() << 1) | SHORT_STRING_FLAG;
return String { short_string };
}
auto data = MUST(Detail::StringData::from_utf8(reinterpret_cast<char const*>(bytes.data()), bytes.size()));
return String { move(data) };
}
ErrorOr<String> String::from_utf8(StringView view)
{
if (!Utf8View { view }.validate())