mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 15:12:45 +00:00 
			
		
		
		
	AK: Ensure short String instances are valid UTF-8
We are currently only validating long strings.
This commit is contained in:
		
							parent
							
								
									434ca78425
								
							
						
					
					
						commit
						da0d000909
					
				
					 3 changed files with 37 additions and 15 deletions
				
			
		|  | @ -11,7 +11,6 @@ | ||||||
| #include <AK/MemMem.h> | #include <AK/MemMem.h> | ||||||
| #include <AK/Stream.h> | #include <AK/Stream.h> | ||||||
| #include <AK/String.h> | #include <AK/String.h> | ||||||
| #include <AK/Utf8View.h> |  | ||||||
| #include <AK/Vector.h> | #include <AK/Vector.h> | ||||||
| #include <stdlib.h> | #include <stdlib.h> | ||||||
| 
 | 
 | ||||||
|  | @ -132,10 +131,6 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data, | ||||||
|     // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
 |     // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
 | ||||||
|     VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT); |     VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT); | ||||||
| 
 | 
 | ||||||
|     Utf8View view(StringView(utf8_data, byte_count)); |  | ||||||
|     if (!view.validate()) |  | ||||||
|         return Error::from_string_literal("StringData::from_utf8: Input was not valid UTF-8"); |  | ||||||
| 
 |  | ||||||
|     VERIFY(utf8_data); |     VERIFY(utf8_data); | ||||||
|     u8* buffer = nullptr; |     u8* buffer = nullptr; | ||||||
|     auto new_string_data = TRY(create_uninitialized(byte_count, buffer)); |     auto new_string_data = TRY(create_uninitialized(byte_count, buffer)); | ||||||
|  | @ -143,6 +138,16 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data, | ||||||
|     return new_string_data; |     return new_string_data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static ErrorOr<void> read_stream_into_buffer(Stream& stream, Bytes buffer) | ||||||
|  | { | ||||||
|  |     TRY(stream.read_entire_buffer(buffer)); | ||||||
|  | 
 | ||||||
|  |     if (!Utf8View { StringView { buffer } }.validate()) | ||||||
|  |         return Error::from_string_literal("String::from_stream: Input was not valid UTF-8"); | ||||||
|  | 
 | ||||||
|  |     return {}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_t byte_count) | ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_t byte_count) | ||||||
| { | { | ||||||
|     // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
 |     // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
 | ||||||
|  | @ -150,12 +155,7 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_ | ||||||
| 
 | 
 | ||||||
|     u8* buffer = nullptr; |     u8* buffer = nullptr; | ||||||
|     auto new_string_data = TRY(create_uninitialized(byte_count, buffer)); |     auto new_string_data = TRY(create_uninitialized(byte_count, buffer)); | ||||||
|     Bytes new_string_bytes = { buffer, byte_count }; |     TRY(read_stream_into_buffer(stream, { buffer, byte_count })); | ||||||
|     TRY(stream.read_entire_buffer(new_string_bytes)); |  | ||||||
| 
 |  | ||||||
|     Utf8View view(StringView { new_string_bytes }); |  | ||||||
|     if (!view.validate()) |  | ||||||
|         return Error::from_string_literal("StringData::from_stream: Input was not valid UTF-8"); |  | ||||||
| 
 | 
 | ||||||
|     return new_string_data; |     return new_string_data; | ||||||
| } | } | ||||||
|  | @ -230,6 +230,9 @@ void String::destroy_string() | ||||||
| 
 | 
 | ||||||
| ErrorOr<String> String::from_utf8(StringView view) | ErrorOr<String> String::from_utf8(StringView view) | ||||||
| { | { | ||||||
|  |     if (!Utf8View { view }.validate()) | ||||||
|  |         return Error::from_string_literal("String::from_utf8: Input was not valid UTF-8"); | ||||||
|  | 
 | ||||||
|     if (view.length() <= MAX_SHORT_STRING_BYTE_COUNT) { |     if (view.length() <= MAX_SHORT_STRING_BYTE_COUNT) { | ||||||
|         ShortString short_string; |         ShortString short_string; | ||||||
|         if (!view.is_empty()) |         if (!view.is_empty()) | ||||||
|  | @ -246,7 +249,7 @@ ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count) | ||||||
|     if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) { |     if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) { | ||||||
|         ShortString short_string; |         ShortString short_string; | ||||||
|         if (byte_count > 0) |         if (byte_count > 0) | ||||||
|             TRY(stream.read_entire_buffer({ short_string.storage, byte_count })); |             TRY(Detail::read_stream_into_buffer(stream, { short_string.storage, byte_count })); | ||||||
|         short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG; |         short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG; | ||||||
|         return String { short_string }; |         return String { short_string }; | ||||||
|     } |     } | ||||||
|  | @ -587,9 +590,6 @@ DeprecatedString String::to_deprecated_string() const | ||||||
| 
 | 
 | ||||||
| ErrorOr<String> String::from_deprecated_string(DeprecatedString const& deprecated_string) | ErrorOr<String> String::from_deprecated_string(DeprecatedString const& deprecated_string) | ||||||
| { | { | ||||||
|     Utf8View view(deprecated_string); |  | ||||||
|     if (!view.validate()) |  | ||||||
|         return Error::from_string_literal("String::from_deprecated_string: Input was not valid UTF-8"); |  | ||||||
|     return String::from_utf8(deprecated_string.view()); |     return String::from_utf8(deprecated_string.view()); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -20,6 +20,7 @@ | ||||||
| #include <AK/Traits.h> | #include <AK/Traits.h> | ||||||
| #include <AK/Types.h> | #include <AK/Types.h> | ||||||
| #include <AK/UnicodeUtils.h> | #include <AK/UnicodeUtils.h> | ||||||
|  | #include <AK/Utf8View.h> | ||||||
| #include <AK/Vector.h> | #include <AK/Vector.h> | ||||||
| 
 | 
 | ||||||
| namespace AK { | namespace AK { | ||||||
|  | @ -72,6 +73,7 @@ public: | ||||||
|     static AK_SHORT_STRING_CONSTEVAL String from_utf8_short_string(StringView string) |     static AK_SHORT_STRING_CONSTEVAL String from_utf8_short_string(StringView string) | ||||||
|     { |     { | ||||||
|         VERIFY(string.length() <= MAX_SHORT_STRING_BYTE_COUNT); |         VERIFY(string.length() <= MAX_SHORT_STRING_BYTE_COUNT); | ||||||
|  |         VERIFY(Utf8View { string }.validate()); | ||||||
| 
 | 
 | ||||||
|         ShortString short_string; |         ShortString short_string; | ||||||
|         for (size_t i = 0; i < string.length(); ++i) |         for (size_t i = 0; i < string.length(); ++i) | ||||||
|  |  | ||||||
|  | @ -140,6 +140,26 @@ TEST_CASE(long_streams) | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | TEST_CASE(invalid_utf8) | ||||||
|  | { | ||||||
|  |     auto string1 = String::from_utf8("long string \xf4\x8f\xbf\xc0"sv); // U+110000
 | ||||||
|  |     EXPECT(string1.is_error()); | ||||||
|  |     EXPECT(string1.error().string_literal().contains("Input was not valid UTF-8"sv)); | ||||||
|  | 
 | ||||||
|  |     auto string2 = String::from_utf8("\xf4\xa1\xb0\xbd"sv); // U+121C3D
 | ||||||
|  |     EXPECT(string2.is_error()); | ||||||
|  |     EXPECT(string2.error().string_literal().contains("Input was not valid UTF-8"sv)); | ||||||
|  | 
 | ||||||
|  |     AllocatingMemoryStream stream; | ||||||
|  |     MUST(stream.write_value<u8>(0xf4)); | ||||||
|  |     MUST(stream.write_value<u8>(0xa1)); | ||||||
|  |     MUST(stream.write_value<u8>(0xb0)); | ||||||
|  |     MUST(stream.write_value<u8>(0xbd)); | ||||||
|  |     auto string3 = String::from_stream(stream, stream.used_buffer_size()); | ||||||
|  |     EXPECT_EQ(string3.is_error(), true); | ||||||
|  |     EXPECT(string3.error().string_literal().contains("Input was not valid UTF-8"sv)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| TEST_CASE(from_code_points) | TEST_CASE(from_code_points) | ||||||
| { | { | ||||||
|     for (u32 code_point = 0; code_point < 0x80; ++code_point) { |     for (u32 code_point = 0; code_point < 0x80; ++code_point) { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn