From 0bb46235a713d773e6b6deb314affdb84e31ce01 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 1 Dec 2022 10:36:44 -0500 Subject: [PATCH] LibJS: Implement String.prototype.isWellFormed --- .../LibJS/Runtime/CommonPropertyNames.h | 1 + .../LibJS/Runtime/StringPrototype.cpp | 38 +++++++++++++++ .../Libraries/LibJS/Runtime/StringPrototype.h | 1 + .../String/String.prototype.isWellFormed.js | 47 +++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.isWellFormed.js diff --git a/Userland/Libraries/LibJS/Runtime/CommonPropertyNames.h b/Userland/Libraries/LibJS/Runtime/CommonPropertyNames.h index 33fc78f5c4..9064a8522e 100644 --- a/Userland/Libraries/LibJS/Runtime/CommonPropertyNames.h +++ b/Userland/Libraries/LibJS/Runtime/CommonPropertyNames.h @@ -308,6 +308,7 @@ namespace JS { P(isoNanosecond) \ P(isoSecond) \ P(isoYear) \ + P(isWellFormed) \ P(isWordLike) \ P(italics) \ P(join) \ diff --git a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp index 3c8ad00ed6..80cfd0d27b 100644 --- a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp @@ -81,6 +81,32 @@ static Optional string_index_of(Utf16View const& string, Utf16View const return {}; } +// 7.2.9 Static Semantics: IsStringWellFormedUnicode ( string ) +static bool is_string_well_formed_unicode(Utf16View string) +{ + // 1. Let strLen be the length of string. + auto length = string.length_in_code_units(); + + // 2. Let k be 0. + size_t k = 0; + + // 3. Repeat, while k ≠ strLen, + while (k != length) { + // a. Let cp be CodePointAt(string, k). + auto code_point = code_point_at(string, k); + + // b. If cp.[[IsUnpairedSurrogate]] is true, return false. + if (code_point.is_unpaired_surrogate) + return false; + + // c. Set k to k + cp.[[CodeUnitCount]]. + k += code_point.code_unit_count; + } + + // 4. Return true. + return true; +} + // 11.1.4 CodePointAt ( string, position ), https://tc39.es/ecma262/#sec-codepointat CodePoint code_point_at(Utf16View const& string, size_t position) { @@ -124,6 +150,7 @@ void StringPrototype::initialize(Realm& realm) define_native_function(realm, vm.names.endsWith, ends_with, 1, attr); define_native_function(realm, vm.names.includes, includes, 1, attr); define_native_function(realm, vm.names.indexOf, index_of, 1, attr); + define_native_function(realm, vm.names.isWellFormed, is_well_formed, 0, attr); define_native_function(realm, vm.names.lastIndexOf, last_index_of, 1, attr); define_native_function(realm, vm.names.localeCompare, locale_compare, 1, attr); define_native_function(realm, vm.names.match, match, 1, attr); @@ -345,6 +372,17 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::index_of) return index.has_value() ? Value(*index) : Value(-1); } +// 22.1.3.10 String.prototype.isWellFormed ( ), https://tc39.es/proposal-is-usv-string/#sec-string.prototype.iswellformed +JS_DEFINE_NATIVE_FUNCTION(StringPrototype::is_well_formed) +{ + // 1. Let O be ? RequireObjectCoercible(this value). + // 2. Let S be ? ToString(O). + auto string = TRY(utf16_string_from(vm)); + + // 3. Return IsStringWellFormedUnicode(S). + return is_string_well_formed_unicode(string.view()); +} + // 22.1.3.10 String.prototype.lastIndexOf ( searchString [ , position ] ), https://tc39.es/ecma262/#sec-string.prototype.lastindexof JS_DEFINE_NATIVE_FUNCTION(StringPrototype::last_index_of) { diff --git a/Userland/Libraries/LibJS/Runtime/StringPrototype.h b/Userland/Libraries/LibJS/Runtime/StringPrototype.h index 96d1b2fd61..2d51769e04 100644 --- a/Userland/Libraries/LibJS/Runtime/StringPrototype.h +++ b/Userland/Libraries/LibJS/Runtime/StringPrototype.h @@ -38,6 +38,7 @@ private: JS_DECLARE_NATIVE_FUNCTION(ends_with); JS_DECLARE_NATIVE_FUNCTION(includes); JS_DECLARE_NATIVE_FUNCTION(index_of); + JS_DECLARE_NATIVE_FUNCTION(is_well_formed); JS_DECLARE_NATIVE_FUNCTION(last_index_of); JS_DECLARE_NATIVE_FUNCTION(locale_compare); JS_DECLARE_NATIVE_FUNCTION(match); diff --git a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.isWellFormed.js b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.isWellFormed.js new file mode 100644 index 0000000000..883271b5ad --- /dev/null +++ b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.isWellFormed.js @@ -0,0 +1,47 @@ +describe("errors", () => { + test("called with value that cannot be converted to a string", () => { + expect(() => { + String.prototype.isWellFormed.call(Symbol.hasInstance); + }).toThrowWithMessage(TypeError, "Cannot convert symbol to string"); + }); +}); + +describe("basic functionality", () => { + test("ascii strings", () => { + expect("".isWellFormed()).toBeTrue(); + expect("foo".isWellFormed()).toBeTrue(); + expect("abcdefghi".isWellFormed()).toBeTrue(); + }); + + test("valid UTF-16 strings", () => { + expect("😀".isWellFormed()).toBeTrue(); + expect("\ud83d\ude00".isWellFormed()).toBeTrue(); + }); + + test("invalid UTF-16 strings", () => { + expect("😀".slice(0, 1).isWellFormed()).toBeFalse(); + expect("😀".slice(1, 2).isWellFormed()).toBeFalse(); + expect("\ud83d".isWellFormed()).toBeFalse(); + expect("\ude00".isWellFormed()).toBeFalse(); + expect("a\ud83d".isWellFormed()).toBeFalse(); + expect("a\ude00".isWellFormed()).toBeFalse(); + expect("\ud83da".isWellFormed()).toBeFalse(); + expect("\ude00a".isWellFormed()).toBeFalse(); + expect("a\ud83da".isWellFormed()).toBeFalse(); + expect("a\ude00a".isWellFormed()).toBeFalse(); + }); + + test("object converted to a string", () => { + let toStringCalled = false; + + const obj = { + toString: function () { + toStringCalled = true; + return "toString"; + }, + }; + + expect(String.prototype.isWellFormed.call(obj)).toBeTrue(); + expect(toStringCalled).toBeTrue(); + }); +});