From 77a601d52ecf62043bc89b81e7bcb4414730dc56 Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 1 Apr 2021 16:33:51 -0400 Subject: [PATCH] LibJS: Implement most of String.prototype.replace --- .../LibJS/Runtime/RegExpPrototype.cpp | 148 ++++++++++++++++++ .../Libraries/LibJS/Runtime/RegExpPrototype.h | 1 + .../LibJS/Runtime/StringPrototype.cpp | 54 +++++++ .../Libraries/LibJS/Runtime/StringPrototype.h | 1 + .../String/String.prototype.replace.js | 104 ++++++++++++ 5 files changed, 308 insertions(+) create mode 100644 Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replace.js diff --git a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp index 473af54fc3..d1c1886dce 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.cpp @@ -50,6 +50,7 @@ void RegExpPrototype::initialize(GlobalObject& global_object) define_native_function(vm.names.exec, exec, 1, attr); define_native_function(vm.well_known_symbol_match(), symbol_match, 1, attr); + define_native_function(vm.well_known_symbol_replace(), symbol_replace, 2, attr); u8 readable_attr = Attribute::Configurable; define_native_property(vm.names.flags, flags, {}, readable_attr); @@ -281,4 +282,151 @@ JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_match) return vm.call(*exec, rx, js_string(vm, s)); } +JS_DEFINE_NATIVE_FUNCTION(RegExpPrototype::symbol_replace) +{ + auto string_value = vm.argument(0); + auto replace_value = vm.argument(1); + + // https://tc39.es/ecma262/#sec-regexp.prototype-@@replace + auto rx = regexp_object_from(vm, global_object); + if (!rx) + return {}; + auto string = string_value.to_string(global_object); + if (vm.exception()) + return {}; + + auto global_value = rx->get(vm.names.global).value_or(js_undefined()); + if (vm.exception()) + return {}; + + bool global = global_value.to_boolean(); + if (global) + rx->regex().start_offset = 0; + + // FIXME: Implement and use RegExpExec - https://tc39.es/ecma262/#sec-regexpexec + auto* exec = get_method(global_object, rx, vm.names.exec); + if (!exec) + return {}; + + Vector results; + + while (true) { + auto result = vm.call(*exec, rx, string_value); + if (vm.exception()) + return {}; + if (result.is_null()) + break; + + auto* result_object = result.to_object(global_object); + if (!result_object) + return {}; + + results.append(result_object); + if (!global) + break; + + auto match_object = result_object->get(0); + if (vm.exception()) + return {}; + + String match_str = match_object.to_string(global_object); + if (vm.exception()) + return {}; + if (match_str.is_empty()) { + // FIXME: Implement AdvanceStringIndex to take Unicode code points into account - https://tc39.es/ecma262/#sec-advancestringindex + // Once implemented, step (8a) of the @@replace algorithm must also be implemented. + rx->regex().start_offset += 1; + } + } + + String accumulated_result; + size_t next_source_position = 0; + + for (auto* result : results) { + size_t result_length = length_of_array_like(global_object, *result); + size_t n_captures = result_length == 0 ? 0 : result_length - 1; + + auto matched = result->get(0).value_or(js_undefined()); + if (vm.exception()) + return {}; + + auto position_value = result->get(vm.names.index).value_or(js_undefined()); + if (vm.exception()) + return {}; + + double position = position_value.to_integer_or_infinity(global_object); + if (vm.exception()) + return {}; + + position = clamp(position, static_cast(0), static_cast(string.length())); + + Vector captures; + for (size_t n = 1; n <= n_captures; ++n) { + auto capture = result->get(n).value_or(js_undefined()); + if (vm.exception()) + return {}; + + if (!capture.is_undefined()) { + auto capture_string = capture.to_string(global_object); + if (vm.exception()) + return {}; + + capture = Value(js_string(vm, capture_string)); + if (vm.exception()) + return {}; + } + + captures.append(move(capture)); + } + + auto named_captures = result->get(vm.names.groups).value_or(js_undefined()); + if (vm.exception()) + return {}; + + String replacement; + + if (replace_value.is_function()) { + Vector replacer_args { matched }; + replacer_args.append(move(captures)); + replacer_args.append(Value(position)); + replacer_args.append(js_string(vm, string)); + if (!named_captures.is_undefined()) { + replacer_args.append(move(named_captures)); + } + + auto replace_result = vm.call(replace_value.as_function(), js_undefined(), move(replacer_args)); + if (vm.exception()) + return {}; + + replacement = replace_result.to_string(global_object); + if (vm.exception()) + return {}; + } else { + // FIXME: Implement the GetSubstituion algorithm for substituting placeholder '$' characters - https://tc39.es/ecma262/#sec-getsubstitution + replacement = replace_value.to_string(global_object); + if (vm.exception()) + return {}; + } + + if (position >= next_source_position) { + StringBuilder builder; + builder.append(accumulated_result); + builder.append(string.substring(next_source_position, position - next_source_position)); + builder.append(replacement); + + accumulated_result = builder.build(); + next_source_position = position + matched.as_string().string().length(); + } + } + + if (next_source_position >= string.length()) + return js_string(vm, accumulated_result); + + StringBuilder builder; + builder.append(accumulated_result); + builder.append(string.substring(next_source_position)); + + return js_string(vm, builder.build()); +} + } diff --git a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.h b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.h index 6ab54a7ca6..d50fd45da0 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpPrototype.h +++ b/Userland/Libraries/LibJS/Runtime/RegExpPrototype.h @@ -48,6 +48,7 @@ private: JS_DECLARE_NATIVE_FUNCTION(test); JS_DECLARE_NATIVE_FUNCTION(to_string); JS_DECLARE_NATIVE_FUNCTION(symbol_match); + JS_DECLARE_NATIVE_FUNCTION(symbol_replace); #define __JS_ENUMERATE(_, flag_name, ...) \ JS_DECLARE_NATIVE_GETTER(flag_name); diff --git a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp index e7cf3428fa..b29a4b923d 100644 --- a/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp +++ b/Userland/Libraries/LibJS/Runtime/StringPrototype.cpp @@ -108,6 +108,7 @@ void StringPrototype::initialize(GlobalObject& global_object) define_native_function(vm.names.lastIndexOf, last_index_of, 1, attr); define_native_function(vm.names.at, at, 1, attr); define_native_function(vm.names.match, match, 1, attr); + define_native_function(vm.names.replace, replace, 2, attr); define_native_function(vm.well_known_symbol_iterator(), symbol_iterator, 0, attr); } @@ -675,4 +676,57 @@ JS_DEFINE_NATIVE_FUNCTION(StringPrototype::match) return rx->invoke(vm.well_known_symbol_match(), js_string(vm, s)); } +JS_DEFINE_NATIVE_FUNCTION(StringPrototype::replace) +{ + // https://tc39.es/ecma262/#sec-string.prototype.replace + auto this_object = vm.this_value(global_object); + if (this_object.is_nullish()) { + vm.throw_exception(global_object, ErrorType::ToObjectNullOrUndefined); + return {}; + } + + auto search_value = vm.argument(0); + auto replace_value = vm.argument(1); + + if (!search_value.is_nullish()) { + if (auto* replacer = get_method(global_object, search_value, vm.well_known_symbol_replace())) + return vm.call(*replacer, search_value, this_object, replace_value); + } + + auto string = this_object.to_string(global_object); + if (vm.exception()) + return {}; + auto search_string = search_value.to_string(global_object); + if (vm.exception()) + return {}; + Optional position = string.index_of(search_string); + if (!position.has_value()) + return js_string(vm, string); + + auto preserved = string.substring(0, position.value()); + String replacement; + + if (replace_value.is_function()) { + auto result = vm.call(replace_value.as_function(), js_undefined(), search_value, Value(position.value()), js_string(vm, string)); + if (vm.exception()) + return {}; + + replacement = result.to_string(global_object); + if (vm.exception()) + return {}; + } else { + // FIXME: Implement the GetSubstituion algorithm for substituting placeholder '$' characters - https://tc39.es/ecma262/#sec-getsubstitution + replacement = replace_value.to_string(global_object); + if (vm.exception()) + return {}; + } + + StringBuilder builder; + builder.append(preserved); + builder.append(replacement); + builder.append(string.substring(position.value() + search_string.length())); + + return js_string(vm, builder.build()); +} + } diff --git a/Userland/Libraries/LibJS/Runtime/StringPrototype.h b/Userland/Libraries/LibJS/Runtime/StringPrototype.h index d764b8fd6b..41110ecc1b 100644 --- a/Userland/Libraries/LibJS/Runtime/StringPrototype.h +++ b/Userland/Libraries/LibJS/Runtime/StringPrototype.h @@ -64,6 +64,7 @@ private: JS_DECLARE_NATIVE_FUNCTION(last_index_of); JS_DECLARE_NATIVE_FUNCTION(at); JS_DECLARE_NATIVE_FUNCTION(match); + JS_DECLARE_NATIVE_FUNCTION(replace); JS_DECLARE_NATIVE_FUNCTION(symbol_iterator); }; diff --git a/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replace.js b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replace.js new file mode 100644 index 0000000000..5cce84c653 --- /dev/null +++ b/Userland/Libraries/LibJS/Tests/builtins/String/String.prototype.replace.js @@ -0,0 +1,104 @@ +test("invariants", () => { + expect(String.prototype.replace).toHaveLength(2); +}); + +test("error cases", () => { + [null, undefined].forEach(value => { + expect(() => { + value.replace("", ""); + }).toThrow(TypeError); + }); +}); + +test("basic string replacement", () => { + expect("".replace("", "")).toBe(""); + expect("".replace("a", "")).toBe(""); + expect("".replace("", "a")).toBe("a"); + + expect("a".replace("a", "")).toBe(""); + expect("a".replace("a", "b")).toBe("b"); + expect("aa".replace("a", "b")).toBe("ba"); + expect("ca".replace("a", "b")).toBe("cb"); +}); + +test("convertible string replacement", () => { + expect("123".replace(2, "x")).toBe("1x3"); + expect("123".replace("2", 4)).toBe("143"); + expect("123".replace(2, 4)).toBe("143"); +}); + +test("functional string replacement", () => { + expect( + "a".replace("a", function () { + return "b"; + }) + ).toBe("b"); + expect("a".replace("a", () => "b")).toBe("b"); + + expect( + "abc".replace("b", (search, position, string) => { + expect(search).toBe("b"); + expect(position).toBe(1); + expect(string).toBe("abc"); + return "x"; + }) + ).toBe("axc"); +}); + +test("basic regex replacement", () => { + expect("".replace(/a/, "")).toBe(""); + expect("a".replace(/a/, "")).toBe(""); + + expect("abc123def".replace(/\D/, "*")).toBe("*bc123def"); + expect("123abc456".replace(/\D/, "*")).toBe("123*bc456"); + expect("abc123def".replace(/\D/g, "*")).toBe("***123***"); + expect("123abc456".replace(/\D/g, "*")).toBe("123***456"); +}); + +test("functional regex replacement", () => { + expect( + "a".replace(/a/, function () { + return "b"; + }) + ).toBe("b"); + expect("a".replace(/a/, () => "b")).toBe("b"); + + expect( + "abc".replace(/\D/, (matched, position, string) => { + expect(matched).toBe("a"); + expect(position).toBe(0); + expect(string).toBe("abc"); + return "x"; + }) + ).toBe("xbc"); + + expect( + "abc".replace(/\D/g, (matched, position, string) => { + expect(matched).toBe(string[position]); + expect(position <= 2).toBeTrue(); + expect(string).toBe("abc"); + return "x"; + }) + ).toBe("xxx"); + + expect( + "abc".replace(/(\D)/g, (matched, capture1, position, string) => { + expect(matched).toBe(string[position]); + expect(capture1).toBe(string[position]); + expect(position <= 2).toBeTrue(); + expect(string).toBe("abc"); + return "x"; + }) + ).toBe("xxx"); + + expect( + "abcd".replace(/(\D)b(\D)/g, (matched, capture1, capture2, position, string) => { + expect(matched).toBe("abc"); + expect(capture1).toBe("a"); + expect(capture2).toBe("c"); + expect(position).toBe(0); + expect(string).toBe("abcd"); + return "x"; + }) + ).toBe("xd"); +});