mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 21:42:43 +00:00 
			
		
		
		
	LibJS+AK: Use Vector<u16, 1> for UTF-16 string storage
It's very common to encounter single-character strings in JavaScript on the web. We can make such strings significantly lighter by having a 1-character inline capacity on the Vectors.
This commit is contained in:
		
							parent
							
								
									ae0bdda86e
								
							
						
					
					
						commit
						024367d82e
					
				
					 7 changed files with 27 additions and 26 deletions
				
			
		|  | @ -21,9 +21,10 @@ static constexpr u32 replacement_code_point = 0xfffd; | ||||||
| static constexpr u32 first_supplementary_plane_code_point = 0x10000; | static constexpr u32 first_supplementary_plane_code_point = 0x10000; | ||||||
| 
 | 
 | ||||||
| template<typename UtfViewType> | template<typename UtfViewType> | ||||||
| static Vector<u16> to_utf16_impl(UtfViewType const& view) requires(IsSame<UtfViewType, Utf8View> || IsSame<UtfViewType, Utf32View>) | static Vector<u16, 1> to_utf16_impl(UtfViewType const& view) requires(IsSame<UtfViewType, Utf8View> || IsSame<UtfViewType, Utf32View>) | ||||||
| { | { | ||||||
|     Vector<u16> utf16_data; |     Vector<u16, 1> utf16_data; | ||||||
|  |     utf16_data.ensure_capacity(view.length()); | ||||||
| 
 | 
 | ||||||
|     for (auto code_point : view) |     for (auto code_point : view) | ||||||
|         code_point_to_utf16(utf16_data, code_point); |         code_point_to_utf16(utf16_data, code_point); | ||||||
|  | @ -31,22 +32,22 @@ static Vector<u16> to_utf16_impl(UtfViewType const& view) requires(IsSame<UtfVie | ||||||
|     return utf16_data; |     return utf16_data; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Vector<u16> utf8_to_utf16(StringView const& utf8_view) | Vector<u16, 1> utf8_to_utf16(StringView const& utf8_view) | ||||||
| { | { | ||||||
|     return to_utf16_impl(Utf8View { utf8_view }); |     return to_utf16_impl(Utf8View { utf8_view }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Vector<u16> utf8_to_utf16(Utf8View const& utf8_view) | Vector<u16, 1> utf8_to_utf16(Utf8View const& utf8_view) | ||||||
| { | { | ||||||
|     return to_utf16_impl(utf8_view); |     return to_utf16_impl(utf8_view); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Vector<u16> utf32_to_utf16(Utf32View const& utf32_view) | Vector<u16, 1> utf32_to_utf16(Utf32View const& utf32_view) | ||||||
| { | { | ||||||
|     return to_utf16_impl(utf32_view); |     return to_utf16_impl(utf32_view); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void code_point_to_utf16(Vector<u16>& string, u32 code_point) | void code_point_to_utf16(Vector<u16, 1>& string, u32 code_point) | ||||||
| { | { | ||||||
|     VERIFY(is_unicode(code_point)); |     VERIFY(is_unicode(code_point)); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -16,10 +16,10 @@ | ||||||
| 
 | 
 | ||||||
| namespace AK { | namespace AK { | ||||||
| 
 | 
 | ||||||
| Vector<u16> utf8_to_utf16(StringView const&); | Vector<u16, 1> utf8_to_utf16(StringView const&); | ||||||
| Vector<u16> utf8_to_utf16(Utf8View const&); | Vector<u16, 1> utf8_to_utf16(Utf8View const&); | ||||||
| Vector<u16> utf32_to_utf16(Utf32View const&); | Vector<u16, 1> utf32_to_utf16(Utf32View const&); | ||||||
| void code_point_to_utf16(Vector<u16>&, u32); | void code_point_to_utf16(Vector<u16, 1>&, u32); | ||||||
| 
 | 
 | ||||||
| class Utf16View; | class Utf16View; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -140,7 +140,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_char_code) | ||||||
| // 22.1.2.2 String.fromCodePoint ( ...codePoints ), https://tc39.es/ecma262/#sec-string.fromcodepoint
 | // 22.1.2.2 String.fromCodePoint ( ...codePoints ), https://tc39.es/ecma262/#sec-string.fromcodepoint
 | ||||||
| JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point) | JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point) | ||||||
| { | { | ||||||
|     Vector<u16> string; |     Vector<u16, 1> string; | ||||||
|     string.ensure_capacity(vm.argument_count()); // This will be an under-estimate if any code point is > 0xffff.
 |     string.ensure_capacity(vm.argument_count()); // This will be an under-estimate if any code point is > 0xffff.
 | ||||||
| 
 | 
 | ||||||
|     for (size_t i = 0; i < vm.argument_count(); ++i) { |     for (size_t i = 0; i < vm.argument_count(); ++i) { | ||||||
|  |  | ||||||
|  | @ -17,7 +17,7 @@ static NonnullRefPtr<Utf16StringImpl> the_empty_utf16_string() | ||||||
|     return empty_string; |     return empty_string; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Utf16StringImpl::Utf16StringImpl(Vector<u16> string) | Utf16StringImpl::Utf16StringImpl(Vector<u16, 1> string) | ||||||
|     : m_string(move(string)) |     : m_string(move(string)) | ||||||
| { | { | ||||||
| } | } | ||||||
|  | @ -27,7 +27,7 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create() | ||||||
|     return adopt_ref(*new Utf16StringImpl()); |     return adopt_ref(*new Utf16StringImpl()); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Vector<u16> string) | NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Vector<u16, 1> string) | ||||||
| { | { | ||||||
|     return adopt_ref(*new Utf16StringImpl(move(string))); |     return adopt_ref(*new Utf16StringImpl(move(string))); | ||||||
| } | } | ||||||
|  | @ -39,13 +39,13 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView const& string) | ||||||
| 
 | 
 | ||||||
| NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view) | NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view) | ||||||
| { | { | ||||||
|     Vector<u16> string; |     Vector<u16, 1> string; | ||||||
|     string.ensure_capacity(view.length_in_code_units()); |     string.ensure_capacity(view.length_in_code_units()); | ||||||
|     string.append(view.data(), view.length_in_code_units()); |     string.append(view.data(), view.length_in_code_units()); | ||||||
|     return create(move(string)); |     return create(move(string)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Vector<u16> const& Utf16StringImpl::string() const | Vector<u16, 1> const& Utf16StringImpl::string() const | ||||||
| { | { | ||||||
|     return m_string; |     return m_string; | ||||||
| } | } | ||||||
|  | @ -62,7 +62,7 @@ Utf16String::Utf16String() | ||||||
| { | { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Utf16String::Utf16String(Vector<u16> string) | Utf16String::Utf16String(Vector<u16, 1> string) | ||||||
|     : m_string(Detail::Utf16StringImpl::create(move(string))) |     : m_string(Detail::Utf16StringImpl::create(move(string))) | ||||||
| { | { | ||||||
| } | } | ||||||
|  | @ -77,7 +77,7 @@ Utf16String::Utf16String(Utf16View const& string) | ||||||
| { | { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Vector<u16> const& Utf16String::string() const | Vector<u16, 1> const& Utf16String::string() const | ||||||
| { | { | ||||||
|     return m_string->string(); |     return m_string->string(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -20,18 +20,18 @@ public: | ||||||
|     ~Utf16StringImpl() = default; |     ~Utf16StringImpl() = default; | ||||||
| 
 | 
 | ||||||
|     static NonnullRefPtr<Utf16StringImpl> create(); |     static NonnullRefPtr<Utf16StringImpl> create(); | ||||||
|     static NonnullRefPtr<Utf16StringImpl> create(Vector<u16>); |     static NonnullRefPtr<Utf16StringImpl> create(Vector<u16, 1>); | ||||||
|     static NonnullRefPtr<Utf16StringImpl> create(StringView const&); |     static NonnullRefPtr<Utf16StringImpl> create(StringView const&); | ||||||
|     static NonnullRefPtr<Utf16StringImpl> create(Utf16View const&); |     static NonnullRefPtr<Utf16StringImpl> create(Utf16View const&); | ||||||
| 
 | 
 | ||||||
|     Vector<u16> const& string() const; |     Vector<u16, 1> const& string() const; | ||||||
|     Utf16View view() const; |     Utf16View view() const; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     Utf16StringImpl() = default; |     Utf16StringImpl() = default; | ||||||
|     explicit Utf16StringImpl(Vector<u16> string); |     explicit Utf16StringImpl(Vector<u16, 1> string); | ||||||
| 
 | 
 | ||||||
|     Vector<u16> m_string; |     Vector<u16, 1> m_string; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  | @ -39,11 +39,11 @@ private: | ||||||
| class Utf16String { | class Utf16String { | ||||||
| public: | public: | ||||||
|     Utf16String(); |     Utf16String(); | ||||||
|     explicit Utf16String(Vector<u16>); |     explicit Utf16String(Vector<u16, 1>); | ||||||
|     explicit Utf16String(StringView const&); |     explicit Utf16String(StringView const&); | ||||||
|     explicit Utf16String(Utf16View const&); |     explicit Utf16String(Utf16View const&); | ||||||
| 
 | 
 | ||||||
|     Vector<u16> const& string() const; |     Vector<u16, 1> const& string() const; | ||||||
|     Utf16View view() const; |     Utf16View view() const; | ||||||
|     Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const; |     Utf16View substring_view(size_t code_unit_offset, size_t code_unit_length) const; | ||||||
|     Utf16View substring_view(size_t code_unit_offset) const; |     Utf16View substring_view(size_t code_unit_offset) const; | ||||||
|  |  | ||||||
|  | @ -470,7 +470,7 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(MatchInput const& input, M | ||||||
|                 return ExecutionResult::Failed_ExecuteLowPrioForks; |                 return ExecutionResult::Failed_ExecuteLowPrioForks; | ||||||
| 
 | 
 | ||||||
|             Optional<String> str; |             Optional<String> str; | ||||||
|             Vector<u16> utf16; |             Vector<u16, 1> utf16; | ||||||
|             Vector<u32> data; |             Vector<u32> data; | ||||||
|             data.ensure_capacity(length); |             data.ensure_capacity(length); | ||||||
|             for (size_t i = offset; i < offset + length; ++i) |             for (size_t i = offset; i < offset + length; ++i) | ||||||
|  | @ -557,7 +557,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_char(MatchInput const& input, MatchSt | ||||||
| 
 | 
 | ||||||
|     auto input_view = input.view.substring_view(state.string_position, 1); |     auto input_view = input.view.substring_view(state.string_position, 1); | ||||||
|     Optional<String> str; |     Optional<String> str; | ||||||
|     Vector<u16> utf16; |     Vector<u16, 1> utf16; | ||||||
|     auto compare_view = input_view.construct_as_same({ &ch1, 1 }, str, utf16); |     auto compare_view = input_view.construct_as_same({ &ch1, 1 }, str, utf16); | ||||||
|     bool equal; |     bool equal; | ||||||
|     if (input.regex_options & AllFlags::Insensitive) |     if (input.regex_options & AllFlags::Insensitive) | ||||||
|  |  | ||||||
|  | @ -139,7 +139,7 @@ public: | ||||||
|         return view; |         return view; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     RegexStringView construct_as_same(Span<u32> data, Optional<String>& optional_string_storage, Vector<u16>& optional_utf16_storage) const |     RegexStringView construct_as_same(Span<u32> data, Optional<String>& optional_string_storage, Vector<u16, 1>& optional_utf16_storage) const | ||||||
|     { |     { | ||||||
|         auto view = m_view.visit( |         auto view = m_view.visit( | ||||||
|             [&]<typename T>(T const&) { |             [&]<typename T>(T const&) { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Andreas Kling
						Andreas Kling