1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 04:58:13 +00:00

LibRegex: Change bytecode value type to a 64-bit value

To allow storing unicode ranges compactly; this is not utilised at the
moment, but changing this later would've been significantly more
difficult.
Also fixes a few debug logs.
This commit is contained in:
AnotherTest 2020-11-19 18:45:36 +03:30 committed by Andreas Kling
parent 92ea9ed4a5
commit 3db8ced4c7
5 changed files with 19 additions and 20 deletions

View file

@ -275,7 +275,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(const M
auto& map = output.named_capture_group_matches.at(input.match_index); auto& map = output.named_capture_group_matches.at(input.match_index);
#ifdef REGEX_DEBUG #ifdef REGEX_DEBUG
ASSERT(start_position + length < input.view.length()); ASSERT(start_position + length <= input.view.length());
dbg() << "Save named capture group with name=" << capture_group_name << " and content: " << input.view.substring_view(start_position, length).to_string(); dbg() << "Save named capture group with name=" << capture_group_name << " and content: " << input.view.substring_view(start_position, length).to_string();
#endif #endif
@ -415,7 +415,7 @@ ALWAYS_INLINE bool OpCode_Compare::compare_string(const MatchInput& input, Match
return false; return false;
} }
ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched) ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched)
{ {
switch (character_class) { switch (character_class) {
case CharClass::Alnum: case CharClass::Alnum:
@ -513,7 +513,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
} }
} }
ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched) ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched)
{ {
if (input.regex_options & AllFlags::Insensitive) { if (input.regex_options & AllFlags::Insensitive) {
from = tolower(from); from = tolower(from);
@ -553,7 +553,7 @@ const Vector<String> OpCode_Compare::variable_arguments_to_string(Optional<Match
} else if (compare_type == CharacterCompareType::String) { } else if (compare_type == CharacterCompareType::String) {
char* str = reinterpret_cast<char*>(m_bytecode->at(offset++)); char* str = reinterpret_cast<char*>(m_bytecode->at(offset++));
auto& length = m_bytecode->at(offset++); auto& length = m_bytecode->at(offset++);
result.empend(String::format("value=\"%s\"", String { str, length }.characters())); result.empend(String::format("value=\"%.*s\"", length, str));
if (!view.is_null()) if (!view.is_null())
result.empend(String::format("compare against: \"%s\"", input.value().view.substring_view(state().string_position, state().string_position + length > view.length() ? 0 : length).to_string().characters())); result.empend(String::format("compare against: \"%s\"", input.value().view.substring_view(state().string_position, state().string_position + length > view.length() ? 0 : length).to_string().characters()));
} else if (compare_type == CharacterCompareType::CharClass) { } else if (compare_type == CharacterCompareType::CharClass) {

View file

@ -39,7 +39,7 @@
namespace regex { namespace regex {
using ByteCodeValueType = size_t; using ByteCodeValueType = u64;
#define ENUMERATE_OPCODES \ #define ENUMERATE_OPCODES \
__ENUMERATE_OPCODE(Compare) \ __ENUMERATE_OPCODE(Compare) \
@ -102,22 +102,22 @@ enum class CharClass : ByteCodeValueType {
}; };
struct CharRange { struct CharRange {
const char from; const u32 from;
const char to; const u32 to;
CharRange(size_t value) CharRange(u64 value)
: from(value >> 8) : from(value >> 32)
, to(value & 0xFF) , to(value & 0xffffffff)
{ {
} }
CharRange(char from, char to) CharRange(u32 from, u32 to)
: from(from) : from(from)
, to(to) , to(to)
{ {
} }
operator ByteCodeValueType() const { return (from << 8) | to; } operator ByteCodeValueType() const { return ((u64)from << 32) | to; }
}; };
struct CompareTypeAndValuePair { struct CompareTypeAndValuePair {
@ -568,8 +568,8 @@ public:
private: private:
ALWAYS_INLINE static void compare_char(const MatchInput& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched); ALWAYS_INLINE static void compare_char(const MatchInput& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
ALWAYS_INLINE static bool compare_string(const MatchInput& input, MatchState& state, const char* str, size_t length); ALWAYS_INLINE static bool compare_string(const MatchInput& input, MatchState& state, const char* str, size_t length);
ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched); ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched);
ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched); ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched);
}; };
template<typename T> template<typename T>

View file

@ -131,7 +131,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
for (auto& view : views) { for (auto& view : views) {
input.view = view; input.view = view;
#ifdef REGEX_DEBUG #ifdef REGEX_DEBUG
dbg() << "[match] Starting match with view (" << view.length() << "): _" << view << "_"; dbg() << "[match] Starting match with view (" << view.length() << "): _" << view.to_string() << "_";
#endif #endif
auto view_length = view.length(); auto view_length = view.length();
@ -171,7 +171,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
#ifdef REGEX_DEBUG #ifdef REGEX_DEBUG
dbg() << "state.string_position: " << state.string_position << " view_index: " << view_index; dbg() << "state.string_position: " << state.string_position << " view_index: " << view_index;
dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index); dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index).to_string();
#endif #endif
++match_count; ++match_count;

View file

@ -365,7 +365,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_bracket_expression(ByteCode& stack
values.take_last(); // RangeExpressionDummy values.take_last(); // RangeExpressionDummy
auto value1 = values.take_last(); auto value1 = values.take_last();
values.append({ CharacterCompareType::CharRange, static_cast<ByteCodeValueType>(CharRange { (char)value1.value, (char)value2.value }) }); values.append({ CharacterCompareType::CharRange, static_cast<ByteCodeValueType>(CharRange { (u32)value1.value, (u32)value2.value }) });
} }
} }

View file

@ -1,10 +1,9 @@
file(GLOB TEST_SOURCES CONFIGURE_DEPENDS "*.cpp") file(GLOB TEST_SOURCES CONFIGURE_DEPENDS "*.cpp")
file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp") file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp" "../C/*.cpp")
file(GLOB C_REGEX_SOURCES CONFIGURE_DEPENDS "../C/*.cpp")
foreach(source ${TEST_SOURCES}) foreach(source ${TEST_SOURCES})
get_filename_component(name ${source} NAME_WE) get_filename_component(name ${source} NAME_WE)
add_executable(${name} ${source} ${REGEX_SOURCES} ${C_REGEX_SOURCES}) add_executable(${name} ${source} ${REGEX_SOURCES})
target_link_libraries(${name} LagomCore) target_link_libraries(${name} LagomCore)
add_test( add_test(
NAME ${name} NAME ${name}