mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 23:38:12 +00:00
LibJS: Decode UTF-16 surrogate pairs during string literal construction
Rather than deferring this decoding to PrimitiveString, we can decode surrogate pairs when parsing the string. This prevents a string copy when constructing the PrimitiveString.
This commit is contained in:
parent
27d555bab0
commit
4c2cc419f9
2 changed files with 28 additions and 14 deletions
|
@ -63,17 +63,6 @@ PrimitiveString* js_string(Heap& heap, String string)
|
|||
return &heap.vm().single_ascii_character_string(ch);
|
||||
}
|
||||
|
||||
// UTF-8 strings must first be transcoded to UTF-16, even though they are stored as String objects
|
||||
// internally, to parse encoded surrogate pairs. As an optimization to reduce string copying, only
|
||||
// perform that transcoding if there are non-ASCII codepoints in the string.
|
||||
for (auto it : string) {
|
||||
auto ch = static_cast<u8>(it);
|
||||
if (!is_ascii(ch)) {
|
||||
auto utf16_string = AK::utf8_to_utf16(string);
|
||||
return js_string(heap, Utf16View { utf16_string });
|
||||
}
|
||||
}
|
||||
|
||||
return heap.allocate_without_global_object<PrimitiveString>(move(string));
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue