1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 08:47:34 +00:00

LibJS: Correcly handle surrogates in escape()

Fixes test/annexB/built-ins/escape/escape-above{,-astral}.js in
test262. All tests in test/annexB/built-ins/escape pass now.
This commit is contained in:
Nico Weber 2022-01-13 19:45:11 -05:00 committed by Linus Groh
parent 95b8c1745a
commit 23cde7685c
2 changed files with 4 additions and 2 deletions

View file

@ -9,6 +9,7 @@
#include <AK/CharacterTypes.h>
#include <AK/Hex.h>
#include <AK/Platform.h>
#include <AK/Utf16View.h>
#include <AK/Utf8View.h>
#include <LibJS/Console.h>
#include <LibJS/Heap/DeferGC.h>
@ -548,7 +549,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
{
auto string = TRY(vm.argument(0).to_string(global_object));
StringBuilder escaped;
for (auto code_point : Utf8View(string)) {
for (auto code_point : utf8_to_utf16(string)) {
if (code_point < 256) {
if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(code_point))
escaped.append(code_point);
@ -556,7 +557,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
escaped.appendff("%{:02X}", code_point);
continue;
}
escaped.appendff("%u{:04X}", code_point); // FIXME: Handle utf-16 surrogate pairs
escaped.appendff("%u{:04X}", code_point);
}
return js_string(vm, escaped.build());
}

View file

@ -4,6 +4,7 @@ test("escape", () => {
["äöü", "%E4%F6%FC"],
["ć", "%u0107"],
["@*_+-./", "@*_+-./"],
["\ud834\udf06", "%uD834%uDF06"],
].forEach(test => {
expect(escape(test[0])).toBe(test[1]);
});