From 19bf7734a49c1f86247c6b52c4831cf0575c6180 Mon Sep 17 00:00:00 2001 From: AnotherTest Date: Sun, 6 Dec 2020 17:02:03 +0330 Subject: [PATCH] LibRegex: Store 'String' matches inside the bytecode Also removes an unnecessary 'length' argument (StringView has a length!) --- Libraries/LibRegex/RegexByteCode.cpp | 14 +++++++++----- Libraries/LibRegex/RegexByteCode.h | 16 +++++++++++----- Libraries/LibRegex/RegexParser.cpp | 4 ++-- 3 files changed, 22 insertions(+), 12 deletions(-) diff --git a/Libraries/LibRegex/RegexByteCode.cpp b/Libraries/LibRegex/RegexByteCode.cpp index 3fffc193b7..551327f30e 100644 --- a/Libraries/LibRegex/RegexByteCode.cpp +++ b/Libraries/LibRegex/RegexByteCode.cpp @@ -444,14 +444,16 @@ ALWAYS_INLINE ExecutionResult OpCode_Compare::execute(const MatchInput& input, M } else if (compare_type == CharacterCompareType::String) { ASSERT(!current_inversion_state()); - char* str = reinterpret_cast(m_bytecode->at(offset++)); - auto& length = m_bytecode->at(offset++); + const auto& length = m_bytecode->at(offset++); + StringBuilder str_builder; + for (size_t i = 0; i < length; ++i) + str_builder.append(m_bytecode->at(offset++)); // We want to compare a string that is definitely longer than the available string if (input.view.length() - state.string_position < length) return ExecutionResult::Failed_ExecuteLowPrioForks; - if (!compare_string(input, state, str, length)) + if (!compare_string(input, state, str_builder.string_view().characters_without_null_termination(), length)) return ExecutionResult::Failed_ExecuteLowPrioForks; } else if (compare_type == CharacterCompareType::CharClass) { @@ -717,9 +719,11 @@ const Vector OpCode_Compare::variable_arguments_to_string(Optionalat(offset++); result.empend(String::format("number=%lu", ref)); } else if (compare_type == CharacterCompareType::String) { - char* str = reinterpret_cast(m_bytecode->at(offset++)); auto& length = m_bytecode->at(offset++); - result.empend(String::format("value=\"%.*s\"", length, str)); + StringBuilder str_builder; + for (size_t i = 0; i < length; ++i) + str_builder.append(m_bytecode->at(offset++)); + result.empend(String::format("value=\"%.*s\"", length, str_builder.string_view().characters_without_null_termination())); if (!view.is_null() && view.length() > state().string_position) result.empend(String::format( "compare against: \"%s\"", diff --git a/Libraries/LibRegex/RegexByteCode.h b/Libraries/LibRegex/RegexByteCode.h index da3f971bfe..6701d7c887 100644 --- a/Libraries/LibRegex/RegexByteCode.h +++ b/Libraries/LibRegex/RegexByteCode.h @@ -186,7 +186,7 @@ public: append(move(bytecode)); } - void insert_bytecode_compare_string(StringView view, size_t length) + void insert_bytecode_compare_string(StringView view) { ByteCode bytecode; @@ -196,8 +196,7 @@ public: ByteCode arguments; arguments.empend(static_cast(CharacterCompareType::String)); - arguments.empend(reinterpret_cast(view.characters_without_null_termination())); - arguments.empend(length); + arguments.insert_string(view); bytecode.empend(arguments.size()); // size of arguments bytecode.append(move(arguments)); @@ -205,7 +204,7 @@ public: append(move(bytecode)); } - void insert_bytecode_compare_named_reference(StringView name, size_t length) + void insert_bytecode_compare_named_reference(StringView name) { ByteCode bytecode; @@ -216,7 +215,7 @@ public: arguments.empend(static_cast(CharacterCompareType::NamedReference)); arguments.empend(reinterpret_cast(name.characters_without_null_termination())); - arguments.empend(length); + arguments.empend(name.length()); bytecode.empend(arguments.size()); // size of arguments bytecode.append(move(arguments)); @@ -458,6 +457,13 @@ public: OpCode* get_opcode(MatchState& state) const; private: + void insert_string(const StringView& view) + { + empend((ByteCodeValueType)view.length()); + for (size_t i = 0; i < view.length(); ++i) + empend((ByteCodeValueType)view[i]); + } + ALWAYS_INLINE OpCode* get_opcode_by_id(OpCodeId id) const; static HashMap> s_opcodes; }; diff --git a/Libraries/LibRegex/RegexParser.cpp b/Libraries/LibRegex/RegexParser.cpp index f342cce512..aa22be74b2 100644 --- a/Libraries/LibRegex/RegexParser.cpp +++ b/Libraries/LibRegex/RegexParser.cpp @@ -430,7 +430,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_sub_expression(ByteCode& stack, si if (length > 1) { // last character is inserted into 'bytecode' for duplication symbol handling auto new_length = length - ((match_repetition_symbol() && length > 1) ? 1 : 0); - stack.insert_bytecode_compare_string(start_token.value(), new_length); + stack.insert_bytecode_compare_string({ start_token.value().characters_without_null_termination(), new_length }); } if ((match_repetition_symbol() && length > 1) || length == 1) // Create own compare opcode for last character before duplication symbol @@ -1099,7 +1099,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini } match_length_minimum += maybe_length.value(); - stack.insert_bytecode_compare_named_reference(name, name.length()); + stack.insert_bytecode_compare_named_reference(name); return true; }