mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 22:02:44 +00:00 
			
		
		
		
	AK+LibJS: Implement String.from{CharCode,CodePoint} using UTF-16 strings
Most of String.prototype and RegExp.prototype is implemented with UTF-16 so this is to prevent extra copying of the string data.
This commit is contained in:
		
							parent
							
								
									b6ff7f4fcc
								
							
						
					
					
						commit
						70080feab2
					
				
					 3 changed files with 31 additions and 18 deletions
				
			
		|  | @ -25,15 +25,8 @@ static Vector<u16> to_utf16_impl(UtfViewType const& view) requires(IsSame<UtfVie | |||
| { | ||||
|     Vector<u16> utf16_data; | ||||
| 
 | ||||
|     for (auto code_point : view) { | ||||
|         if (code_point < first_supplementary_plane_code_point) { | ||||
|             utf16_data.append(static_cast<u16>(code_point)); | ||||
|         } else { | ||||
|             code_point -= first_supplementary_plane_code_point; | ||||
|             utf16_data.append(static_cast<u16>(high_surrogate_min | (code_point >> 10))); | ||||
|             utf16_data.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff))); | ||||
|         } | ||||
|     } | ||||
|     for (auto code_point : view) | ||||
|         code_point_to_utf16(utf16_data, code_point); | ||||
| 
 | ||||
|     return utf16_data; | ||||
| } | ||||
|  | @ -53,6 +46,19 @@ Vector<u16> utf32_to_utf16(Utf32View const& utf32_view) | |||
|     return to_utf16_impl(utf32_view); | ||||
| } | ||||
| 
 | ||||
| void code_point_to_utf16(Vector<u16>& string, u32 code_point) | ||||
| { | ||||
|     VERIFY(is_unicode(code_point)); | ||||
| 
 | ||||
|     if (code_point < first_supplementary_plane_code_point) { | ||||
|         string.append(static_cast<u16>(code_point)); | ||||
|     } else { | ||||
|         code_point -= first_supplementary_plane_code_point; | ||||
|         string.append(static_cast<u16>(high_surrogate_min | (code_point >> 10))); | ||||
|         string.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff))); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| bool Utf16View::is_high_surrogate(u16 code_unit) | ||||
| { | ||||
|     return (code_unit >= high_surrogate_min) && (code_unit <= high_surrogate_max); | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ namespace AK { | |||
| Vector<u16> utf8_to_utf16(StringView const&); | ||||
| Vector<u16> utf8_to_utf16(Utf8View const&); | ||||
| Vector<u16> utf32_to_utf16(Utf32View const&); | ||||
| void code_point_to_utf16(Vector<u16>&, u32); | ||||
| 
 | ||||
| class Utf16View; | ||||
| 
 | ||||
|  |  | |||
|  | @ -5,6 +5,7 @@ | |||
|  */ | ||||
| 
 | ||||
| #include <AK/StringBuilder.h> | ||||
| #include <AK/Utf16View.h> | ||||
| #include <AK/Utf32View.h> | ||||
| #include <LibJS/Runtime/AbstractOperations.h> | ||||
| #include <LibJS/Runtime/Array.h> | ||||
|  | @ -124,22 +125,25 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::raw) | |||
| // 22.1.2.1 String.fromCharCode ( ...codeUnits ), https://tc39.es/ecma262/#sec-string.fromcharcode
 | ||||
| JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_char_code) | ||||
| { | ||||
|     StringBuilder builder; | ||||
|     Vector<u16> string; | ||||
|     string.ensure_capacity(vm.argument_count()); | ||||
| 
 | ||||
|     for (size_t i = 0; i < vm.argument_count(); ++i) { | ||||
|         auto char_code = vm.argument(i).to_i32(global_object); | ||||
|         auto code_unit = vm.argument(i).to_u16(global_object); | ||||
|         if (vm.exception()) | ||||
|             return {}; | ||||
|         auto truncated = char_code & 0xffff; | ||||
|         // FIXME: We need an Utf16View :^)
 | ||||
|         builder.append(Utf32View((u32*)&truncated, 1)); | ||||
|         string.append(code_unit); | ||||
|     } | ||||
|     return js_string(vm, builder.build()); | ||||
| 
 | ||||
|     return js_string(vm, move(string)); | ||||
| } | ||||
| 
 | ||||
| // 22.1.2.2 String.fromCodePoint ( ...codePoints ), https://tc39.es/ecma262/#sec-string.fromcodepoint
 | ||||
| JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point) | ||||
| { | ||||
|     StringBuilder builder; | ||||
|     Vector<u16> string; | ||||
|     string.ensure_capacity(vm.argument_count()); // This will be an under-estimate if any code point is > 0xffff.
 | ||||
| 
 | ||||
|     for (size_t i = 0; i < vm.argument_count(); ++i) { | ||||
|         auto next_code_point = vm.argument(i).to_number(global_object); | ||||
|         if (vm.exception()) | ||||
|  | @ -153,9 +157,11 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point) | |||
|             vm.throw_exception<RangeError>(global_object, ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects()); | ||||
|             return {}; | ||||
|         } | ||||
|         builder.append_code_point(code_point); | ||||
| 
 | ||||
|         AK::code_point_to_utf16(string, static_cast<u32>(code_point)); | ||||
|     } | ||||
|     return js_string(vm, builder.build()); | ||||
| 
 | ||||
|     return js_string(vm, move(string)); | ||||
| } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn