diff --git a/Libraries/LibRegex/RegexByteCode.cpp b/Libraries/LibRegex/RegexByteCode.cpp index 42aab9120f..a68ee2d728 100644 --- a/Libraries/LibRegex/RegexByteCode.cpp +++ b/Libraries/LibRegex/RegexByteCode.cpp @@ -275,7 +275,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(const M auto& map = output.named_capture_group_matches.at(input.match_index); #ifdef REGEX_DEBUG - ASSERT(start_position + length < input.view.length()); + ASSERT(start_position + length <= input.view.length()); dbg() << "Save named capture group with name=" << capture_group_name << " and content: " << input.view.substring_view(start_position, length).to_string(); #endif @@ -415,7 +415,7 @@ ALWAYS_INLINE bool OpCode_Compare::compare_string(const MatchInput& input, Match return false; } -ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched) +ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched) { switch (character_class) { case CharClass::Alnum: @@ -513,7 +513,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp } } -ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched) +ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched) { if (input.regex_options & AllFlags::Insensitive) { from = tolower(from); @@ -553,7 +553,7 @@ const Vector OpCode_Compare::variable_arguments_to_string(Optional(m_bytecode->at(offset++)); auto& length = m_bytecode->at(offset++); - result.empend(String::format("value=\"%s\"", String { str, length }.characters())); + result.empend(String::format("value=\"%.*s\"", length, str)); if (!view.is_null()) result.empend(String::format("compare against: \"%s\"", input.value().view.substring_view(state().string_position, state().string_position + length > view.length() ? 0 : length).to_string().characters())); } else if (compare_type == CharacterCompareType::CharClass) { diff --git a/Libraries/LibRegex/RegexByteCode.h b/Libraries/LibRegex/RegexByteCode.h index d468b74b37..7cac259bf5 100644 --- a/Libraries/LibRegex/RegexByteCode.h +++ b/Libraries/LibRegex/RegexByteCode.h @@ -39,7 +39,7 @@ namespace regex { -using ByteCodeValueType = size_t; +using ByteCodeValueType = u64; #define ENUMERATE_OPCODES \ __ENUMERATE_OPCODE(Compare) \ @@ -102,22 +102,22 @@ enum class CharClass : ByteCodeValueType { }; struct CharRange { - const char from; - const char to; + const u32 from; + const u32 to; - CharRange(size_t value) - : from(value >> 8) - , to(value & 0xFF) + CharRange(u64 value) + : from(value >> 32) + , to(value & 0xffffffff) { } - CharRange(char from, char to) + CharRange(u32 from, u32 to) : from(from) , to(to) { } - operator ByteCodeValueType() const { return (from << 8) | to; } + operator ByteCodeValueType() const { return ((u64)from << 32) | to; } }; struct CompareTypeAndValuePair { @@ -568,8 +568,8 @@ public: private: ALWAYS_INLINE static void compare_char(const MatchInput& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched); ALWAYS_INLINE static bool compare_string(const MatchInput& input, MatchState& state, const char* str, size_t length); - ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched); - ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched); + ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched); + ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched); }; template diff --git a/Libraries/LibRegex/RegexMatcher.cpp b/Libraries/LibRegex/RegexMatcher.cpp index bfe09b88c5..60bccfd95e 100644 --- a/Libraries/LibRegex/RegexMatcher.cpp +++ b/Libraries/LibRegex/RegexMatcher.cpp @@ -131,7 +131,7 @@ RegexResult Matcher::match(const Vector views, Optional for (auto& view : views) { input.view = view; #ifdef REGEX_DEBUG - dbg() << "[match] Starting match with view (" << view.length() << "): _" << view << "_"; + dbg() << "[match] Starting match with view (" << view.length() << "): _" << view.to_string() << "_"; #endif auto view_length = view.length(); @@ -171,7 +171,7 @@ RegexResult Matcher::match(const Vector views, Optional #ifdef REGEX_DEBUG dbg() << "state.string_position: " << state.string_position << " view_index: " << view_index; - dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index); + dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index).to_string(); #endif ++match_count; diff --git a/Libraries/LibRegex/RegexParser.cpp b/Libraries/LibRegex/RegexParser.cpp index 5d44c117d7..c97427a18c 100644 --- a/Libraries/LibRegex/RegexParser.cpp +++ b/Libraries/LibRegex/RegexParser.cpp @@ -365,7 +365,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_bracket_expression(ByteCode& stack values.take_last(); // RangeExpressionDummy auto value1 = values.take_last(); - values.append({ CharacterCompareType::CharRange, static_cast(CharRange { (char)value1.value, (char)value2.value }) }); + values.append({ CharacterCompareType::CharRange, static_cast(CharRange { (u32)value1.value, (u32)value2.value }) }); } } diff --git a/Libraries/LibRegex/Tests/CMakeLists.txt b/Libraries/LibRegex/Tests/CMakeLists.txt index a337f05153..ae1e399e18 100644 --- a/Libraries/LibRegex/Tests/CMakeLists.txt +++ b/Libraries/LibRegex/Tests/CMakeLists.txt @@ -1,10 +1,9 @@ file(GLOB TEST_SOURCES CONFIGURE_DEPENDS "*.cpp") -file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp") -file(GLOB C_REGEX_SOURCES CONFIGURE_DEPENDS "../C/*.cpp") +file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp" "../C/*.cpp") foreach(source ${TEST_SOURCES}) get_filename_component(name ${source} NAME_WE) - add_executable(${name} ${source} ${REGEX_SOURCES} ${C_REGEX_SOURCES}) + add_executable(${name} ${source} ${REGEX_SOURCES}) target_link_libraries(${name} LagomCore) add_test( NAME ${name}