diff --git a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp index 8b90087827..5373735c0d 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp @@ -112,7 +112,7 @@ size_t advance_string_index(String const& string, size_t index, bool unicode) return advance_string_index(utf16_string_view, index, unicode); } -static void increment_last_index(GlobalObject& global_object, Object& regexp_object, String const& string, bool unicode) +static void increment_last_index(GlobalObject& global_object, Object& regexp_object, Utf16View const& string, bool unicode) { auto& vm = global_object.vm(); @@ -127,6 +127,14 @@ static void increment_last_index(GlobalObject& global_object, Object& regexp_obj regexp_object.set(vm.names.lastIndex, Value(last_index), Object::ShouldThrowExceptions::Yes); } +static void increment_last_index(GlobalObject& global_object, Object& regexp_object, String const& string, bool unicode) +{ + auto utf16_string = AK::utf8_to_utf16(string); + Utf16View utf16_string_view { utf16_string }; + + return increment_last_index(global_object, regexp_object, utf16_string_view, unicode); +} + // 1.1.2.1 Match Records, https://tc39.es/proposal-regexp-match-indices/#sec-match-records struct Match { static Match create(regex::Match const& match) @@ -485,9 +493,11 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match) auto* regexp_object = this_object_from(vm, global_object); if (!regexp_object) return {}; - auto s = vm.argument(0).to_string(global_object); + + auto string = vm.argument(0).to_utf16_string(global_object); if (vm.exception()) return {}; + Utf16View string_view { string }; auto global_value = regexp_object->get(vm.names.global); if (vm.exception()) @@ -495,7 +505,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match) bool global = global_value.to_boolean(); if (!global) { - auto result = regexp_exec(global_object, *regexp_object, s); + auto result = regexp_exec(global_object, *regexp_object, string_view); if (vm.exception()) return {}; return result; @@ -517,7 +527,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match) size_t n = 0; while (true) { - auto result = regexp_exec(global_object, *regexp_object, s); + auto result = regexp_exec(global_object, *regexp_object, string_view); if (vm.exception()) return {}; @@ -542,7 +552,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match) return {}; if (match_str.is_empty()) { - increment_last_index(global_object, *regexp_object, s, unicode); + increment_last_index(global_object, *regexp_object, string_view, unicode); if (vm.exception()) return {}; } @@ -558,7 +568,7 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match_all) if (!regexp_object) return {}; - auto string = vm.argument(0).to_string(global_object); + auto string = vm.argument(0).to_utf16_string(global_object); if (vm.exception()) return {}; diff --git a/Userland/Libraries/LibJS/Runtime/RegExpStringIterator.cpp b/Userland/Libraries/LibJS/Runtime/RegExpStringIterator.cpp index ed5a0bcdd2..2a6508811e 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpStringIterator.cpp +++ b/Userland/Libraries/LibJS/Runtime/RegExpStringIterator.cpp @@ -10,12 +10,12 @@ namespace JS { // 22.2.7.1 CreateRegExpStringIterator ( R, S, global, fullUnicode ), https://tc39.es/ecma262/#sec-createregexpstringiterator -RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, String string, bool global, bool unicode) +RegExpStringIterator* RegExpStringIterator::create(GlobalObject& global_object, Object& regexp_object, Vector string, bool global, bool unicode) { return global_object.heap().allocate(global_object, *global_object.regexp_string_iterator_prototype(), regexp_object, move(string), global, unicode); } -RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, String string, bool global, bool unicode) +RegExpStringIterator::RegExpStringIterator(Object& prototype, Object& regexp_object, Vector string, bool global, bool unicode) : Object(prototype) , m_regexp_object(regexp_object) , m_string(move(string)) diff --git a/Userland/Libraries/LibJS/Runtime/RegExpStringIterator.h b/Userland/Libraries/LibJS/Runtime/RegExpStringIterator.h index fec3bd44e3..849d3a0669 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpStringIterator.h +++ b/Userland/Libraries/LibJS/Runtime/RegExpStringIterator.h @@ -6,6 +6,7 @@ #pragma once +#include #include namespace JS { @@ -14,13 +15,13 @@ class RegExpStringIterator final : public Object { JS_OBJECT(RegExpStringIterator, Object); public: - static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, String string, bool global, bool unicode); + static RegExpStringIterator* create(GlobalObject&, Object& regexp_object, Vector string, bool global, bool unicode); - explicit RegExpStringIterator(Object& prototype, Object& regexp_object, String string, bool global, bool unicode); + explicit RegExpStringIterator(Object& prototype, Object& regexp_object, Vector string, bool global, bool unicode); virtual ~RegExpStringIterator() override = default; Object& regexp_object() { return m_regexp_object; } - String const& string() const { return m_string; } + Utf16View string() const { return Utf16View { m_string }; } bool global() const { return m_global; } bool unicode() const { return m_unicode; } @@ -31,7 +32,7 @@ private: virtual void visit_edges(Cell::Visitor&) override; Object& m_regexp_object; - String m_string; + Vector m_string; bool m_global { false }; bool m_unicode { false }; bool m_done { false }; diff --git a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp index b6e7a934b4..549d82e508 100644 --- a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp @@ -839,13 +839,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match) if (vm.exception()) return {}; } - auto s = this_object.to_string(global_object); + + auto string = this_object.to_utf16_string(global_object); if (vm.exception()) return {}; + Utf16View utf16_string_view { string }; + auto rx = regexp_create(global_object, regexp, js_undefined()); if (!rx) return {}; - return rx->invoke(*vm.well_known_symbol_match(), js_string(vm, s)); + return rx->invoke(*vm.well_known_symbol_match(), js_string(vm, utf16_string_view)); } // 22.1.3.12 String.prototype.matchAll ( regexp ), https://tc39.es/ecma262/#sec-string.prototype.matchall @@ -879,13 +882,16 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match_all) if (vm.exception()) return {}; } - auto s = this_object.to_string(global_object); + + auto string = this_object.to_utf16_string(global_object); if (vm.exception()) return {}; + Utf16View utf16_string_view { string }; + auto rx = regexp_create(global_object, regexp, js_string(vm, "g")); if (!rx) return {}; - return rx->invoke(*vm.well_known_symbol_match_all(), js_string(vm, s)); + return rx->invoke(*vm.well_known_symbol_match_all(), js_string(vm, utf16_string_view)); } // 22.1.3.17 String.prototype.replace ( searchValue, replaceValue ), https://tc39.es/ecma262/#sec-string.prototype.replace diff --git a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.match.js b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.match.js index da503927ae..9a04eae26c 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.match.js +++ b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.match.js @@ -45,3 +45,13 @@ test("override exec with non-function", () => { re.exec = 3; expect("test".match(re)).not.toBeNull(); }); + +test("UTF-16", () => { + expect("😀".match("foo")).toBeNull(); + expect("😀".match("\ud83d")).toEqual(["\ud83d"]); + expect("😀".match("\ude00")).toEqual(["\ude00"]); + expect("😀😀".match("\ud83d")).toEqual(["\ud83d"]); + expect("😀😀".match("\ude00")).toEqual(["\ude00"]); + expect("😀😀".match(/\ud83d/g)).toEqual(["\ud83d", "\ud83d"]); + expect("😀😀".match(/\ude00/g)).toEqual(["\ude00", "\ude00"]); +}); diff --git a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.matchAll.js b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.matchAll.js index dbc6ccf964..a94dbfd94f 100644 --- a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.matchAll.js +++ b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.matchAll.js @@ -76,3 +76,63 @@ test("basic functionality", () => { expect(next.value).toBeUndefined(); } }); + +test("UTF-16", () => { + { + var iterator = "😀".matchAll("foo"); + + var next = iterator.next(); + expect(next.done).toBeTrue(); + expect(next.value).toBeUndefined(); + + next = iterator.next(); + expect(next.done).toBeTrue(); + expect(next.value).toBeUndefined(); + } + { + var iterator = "😀".matchAll("\ud83d"); + + var next = iterator.next(); + expect(next.done).toBeFalse(); + expect(next.value).toEqual(["\ud83d"]); + expect(next.value.index).toBe(0); + + next = iterator.next(); + expect(next.done).toBeTrue(); + expect(next.value).toBeUndefined(); + } + { + var iterator = "😀😀".matchAll("\ud83d"); + + var next = iterator.next(); + expect(next.done).toBeFalse(); + expect(next.value).toEqual(["\ud83d"]); + expect(next.value.index).toBe(0); + + next = iterator.next(); + expect(next.done).toBeFalse(); + expect(next.value).toEqual(["\ud83d"]); + expect(next.value.index).toBe(2); + + next = iterator.next(); + expect(next.done).toBeTrue(); + expect(next.value).toBeUndefined(); + } + { + var iterator = "😀😀".matchAll("\ude00"); + + var next = iterator.next(); + expect(next.done).toBeFalse(); + expect(next.value).toEqual(["\ude00"]); + expect(next.value.index).toBe(1); + + next = iterator.next(); + expect(next.done).toBeFalse(); + expect(next.value).toEqual(["\ude00"]); + expect(next.value.index).toBe(3); + + next = iterator.next(); + expect(next.done).toBeTrue(); + expect(next.value).toBeUndefined(); + } +});