diff --git a/Userland/Libraries/LibJS/Bytecode/Op.cpp b/Userland/Libraries/LibJS/Bytecode/Op.cpp index 519b27b4f5..0eeaf70be3 100644 --- a/Userland/Libraries/LibJS/Bytecode/Op.cpp +++ b/Userland/Libraries/LibJS/Bytecode/Op.cpp @@ -175,7 +175,7 @@ void NewRegExp::execute_impl(Bytecode::Interpreter& interpreter) const auto source = interpreter.current_executable().get_string(m_source_index); auto flags = interpreter.current_executable().get_string(m_flags_index); - interpreter.accumulator() = RegExpObject::create(interpreter.global_object(), source, flags); + interpreter.accumulator() = regexp_create(interpreter.global_object(), js_string(interpreter.vm(), source), js_string(interpreter.vm(), flags)); } void CopyObjectExcludingProperties::execute_impl(Bytecode::Interpreter& interpreter) const diff --git a/Userland/Libraries/LibJS/Runtime/RegExpObject.cpp b/Userland/Libraries/LibJS/Runtime/RegExpObject.cpp index e49539084d..8955c82b3a 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpObject.cpp +++ b/Userland/Libraries/LibJS/Runtime/RegExpObject.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace JS { @@ -88,17 +89,18 @@ static Flags options_from(GlobalObject& global_object, const String& flags) return options; } -RegExpObject* RegExpObject::create(GlobalObject& global_object, String pattern, String flags) +RegExpObject* RegExpObject::create(GlobalObject& global_object, String original_pattern, String parsed_pattern, String flags) { - return global_object.heap().allocate(global_object, pattern, flags, *global_object.regexp_prototype()); + return global_object.heap().allocate(global_object, move(original_pattern), move(parsed_pattern), move(flags), *global_object.regexp_prototype()); } -RegExpObject::RegExpObject(String pattern, String flags, Object& prototype) +RegExpObject::RegExpObject(String original_pattern, String parsed_pattern, String flags, Object& prototype) : Object(prototype) - , m_pattern(pattern) - , m_flags(flags) + , m_original_pattern(move(original_pattern)) + , m_parsed_pattern(move(parsed_pattern)) + , m_flags(move(flags)) , m_active_flags(options_from(global_object(), m_flags)) - , m_regex(pattern, m_active_flags.effective_flags) + , m_regex(m_parsed_pattern, m_active_flags.effective_flags) { if (m_regex.parser_result.error != regex::Error::NoError) { vm().throw_exception(global_object(), ErrorType::RegExpCompileError, m_regex.error_string()); @@ -120,14 +122,7 @@ void RegExpObject::initialize(GlobalObject& global_object) RegExpObject* regexp_create(GlobalObject& global_object, Value pattern, Value flags) { auto& vm = global_object.vm(); - String p; - if (pattern.is_undefined()) { - p = String::empty(); - } else { - p = pattern.to_string(global_object); - if (vm.exception()) - return {}; - } + String f; if (flags.is_undefined()) { f = String::empty(); @@ -136,7 +131,46 @@ RegExpObject* regexp_create(GlobalObject& global_object, Value pattern, Value fl if (vm.exception()) return {}; } - auto* object = RegExpObject::create(global_object, move(p), move(f)); + + String original_pattern; + String parsed_pattern; + + if (pattern.is_undefined()) { + original_pattern = String::empty(); + parsed_pattern = String::empty(); + } else { + auto utf16_pattern = pattern.to_utf16_string(global_object); + if (vm.exception()) + return {}; + + Utf16View utf16_pattern_view { utf16_pattern }; + bool unicode = f.find('u').has_value(); + StringBuilder builder; + + // If the Unicode flag is set, append each code point to the pattern. Otherwise, append each + // code unit. But unlike the spec, multi-byte code units must be escaped for LibRegex to parse. + for (size_t i = 0; i < utf16_pattern_view.length_in_code_units();) { + if (unicode) { + auto code_point = code_point_at(utf16_pattern_view, i); + builder.append_code_point(code_point.code_point); + i += code_point.code_unit_count; + continue; + } + + u16 code_unit = utf16_pattern_view.code_unit_at(i); + ++i; + + if (code_unit > 0x7f) + builder.appendff("\\u{:04x}", code_unit); + else + builder.append_code_point(code_unit); + } + + original_pattern = utf16_pattern_view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes); + parsed_pattern = builder.build(); + } + + auto* object = RegExpObject::create(global_object, move(original_pattern), move(parsed_pattern), move(f)); object->set(vm.names.lastIndex, Value(0), Object::ShouldThrowExceptions::Yes); if (vm.exception()) return {}; diff --git a/Userland/Libraries/LibJS/Runtime/RegExpObject.h b/Userland/Libraries/LibJS/Runtime/RegExpObject.h index 4ee57d3764..528a9f1129 100644 --- a/Userland/Libraries/LibJS/Runtime/RegExpObject.h +++ b/Userland/Libraries/LibJS/Runtime/RegExpObject.h @@ -23,20 +23,21 @@ class RegExpObject : public Object { JS_OBJECT(RegExpObject, Object); public: - static RegExpObject* create(GlobalObject&, String pattern, String flags); + static RegExpObject* create(GlobalObject&, String original_pattern, String parsed_pattern, String flags); - RegExpObject(String pattern, String flags, Object& prototype); + RegExpObject(String original_pattern, String parsed_pattern, String flags, Object& prototype); virtual void initialize(GlobalObject&) override; virtual ~RegExpObject() override; - const String& pattern() const { return m_pattern; } + const String& pattern() const { return m_original_pattern; } const String& flags() const { return m_flags; } const regex::RegexOptions& declared_options() { return m_active_flags.declared_flags; } const Regex& regex() { return m_regex; } const Regex& regex() const { return m_regex; } private: - String m_pattern; + String m_original_pattern; + String m_parsed_pattern; String m_flags; Flags m_active_flags; Regex m_regex;