mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 04:58:13 +00:00
LibRegex: Change bytecode value type to a 64-bit value
To allow storing unicode ranges compactly; this is not utilised at the moment, but changing this later would've been significantly more difficult. Also fixes a few debug logs.
This commit is contained in:
parent
92ea9ed4a5
commit
3db8ced4c7
5 changed files with 19 additions and 20 deletions
|
@ -275,7 +275,7 @@ ALWAYS_INLINE ExecutionResult OpCode_SaveRightNamedCaptureGroup::execute(const M
|
||||||
auto& map = output.named_capture_group_matches.at(input.match_index);
|
auto& map = output.named_capture_group_matches.at(input.match_index);
|
||||||
|
|
||||||
#ifdef REGEX_DEBUG
|
#ifdef REGEX_DEBUG
|
||||||
ASSERT(start_position + length < input.view.length());
|
ASSERT(start_position + length <= input.view.length());
|
||||||
dbg() << "Save named capture group with name=" << capture_group_name << " and content: " << input.view.substring_view(start_position, length).to_string();
|
dbg() << "Save named capture group with name=" << capture_group_name << " and content: " << input.view.substring_view(start_position, length).to_string();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -415,7 +415,7 @@ ALWAYS_INLINE bool OpCode_Compare::compare_string(const MatchInput& input, Match
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched)
|
ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched)
|
||||||
{
|
{
|
||||||
switch (character_class) {
|
switch (character_class) {
|
||||||
case CharClass::Alnum:
|
case CharClass::Alnum:
|
||||||
|
@ -513,7 +513,7 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(const MatchInput& inp
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched)
|
ALWAYS_INLINE void OpCode_Compare::compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched)
|
||||||
{
|
{
|
||||||
if (input.regex_options & AllFlags::Insensitive) {
|
if (input.regex_options & AllFlags::Insensitive) {
|
||||||
from = tolower(from);
|
from = tolower(from);
|
||||||
|
@ -553,7 +553,7 @@ const Vector<String> OpCode_Compare::variable_arguments_to_string(Optional<Match
|
||||||
} else if (compare_type == CharacterCompareType::String) {
|
} else if (compare_type == CharacterCompareType::String) {
|
||||||
char* str = reinterpret_cast<char*>(m_bytecode->at(offset++));
|
char* str = reinterpret_cast<char*>(m_bytecode->at(offset++));
|
||||||
auto& length = m_bytecode->at(offset++);
|
auto& length = m_bytecode->at(offset++);
|
||||||
result.empend(String::format("value=\"%s\"", String { str, length }.characters()));
|
result.empend(String::format("value=\"%.*s\"", length, str));
|
||||||
if (!view.is_null())
|
if (!view.is_null())
|
||||||
result.empend(String::format("compare against: \"%s\"", input.value().view.substring_view(state().string_position, state().string_position + length > view.length() ? 0 : length).to_string().characters()));
|
result.empend(String::format("compare against: \"%s\"", input.value().view.substring_view(state().string_position, state().string_position + length > view.length() ? 0 : length).to_string().characters()));
|
||||||
} else if (compare_type == CharacterCompareType::CharClass) {
|
} else if (compare_type == CharacterCompareType::CharClass) {
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
|
|
||||||
namespace regex {
|
namespace regex {
|
||||||
|
|
||||||
using ByteCodeValueType = size_t;
|
using ByteCodeValueType = u64;
|
||||||
|
|
||||||
#define ENUMERATE_OPCODES \
|
#define ENUMERATE_OPCODES \
|
||||||
__ENUMERATE_OPCODE(Compare) \
|
__ENUMERATE_OPCODE(Compare) \
|
||||||
|
@ -102,22 +102,22 @@ enum class CharClass : ByteCodeValueType {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CharRange {
|
struct CharRange {
|
||||||
const char from;
|
const u32 from;
|
||||||
const char to;
|
const u32 to;
|
||||||
|
|
||||||
CharRange(size_t value)
|
CharRange(u64 value)
|
||||||
: from(value >> 8)
|
: from(value >> 32)
|
||||||
, to(value & 0xFF)
|
, to(value & 0xffffffff)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
CharRange(char from, char to)
|
CharRange(u32 from, u32 to)
|
||||||
: from(from)
|
: from(from)
|
||||||
, to(to)
|
, to(to)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
operator ByteCodeValueType() const { return (from << 8) | to; }
|
operator ByteCodeValueType() const { return ((u64)from << 32) | to; }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct CompareTypeAndValuePair {
|
struct CompareTypeAndValuePair {
|
||||||
|
@ -568,8 +568,8 @@ public:
|
||||||
private:
|
private:
|
||||||
ALWAYS_INLINE static void compare_char(const MatchInput& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
|
ALWAYS_INLINE static void compare_char(const MatchInput& input, MatchState& state, u32 ch1, bool inverse, bool& inverse_matched);
|
||||||
ALWAYS_INLINE static bool compare_string(const MatchInput& input, MatchState& state, const char* str, size_t length);
|
ALWAYS_INLINE static bool compare_string(const MatchInput& input, MatchState& state, const char* str, size_t length);
|
||||||
ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, char ch, bool inverse, bool& inverse_matched);
|
ALWAYS_INLINE static void compare_character_class(const MatchInput& input, MatchState& state, CharClass character_class, u32 ch, bool inverse, bool& inverse_matched);
|
||||||
ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, char from, char to, char ch, bool inverse, bool& inverse_matched);
|
ALWAYS_INLINE static void compare_character_range(const MatchInput& input, MatchState& state, u32 from, u32 to, u32 ch, bool inverse, bool& inverse_matched);
|
||||||
};
|
};
|
||||||
|
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
|
|
@ -131,7 +131,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
||||||
for (auto& view : views) {
|
for (auto& view : views) {
|
||||||
input.view = view;
|
input.view = view;
|
||||||
#ifdef REGEX_DEBUG
|
#ifdef REGEX_DEBUG
|
||||||
dbg() << "[match] Starting match with view (" << view.length() << "): _" << view << "_";
|
dbg() << "[match] Starting match with view (" << view.length() << "): _" << view.to_string() << "_";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
auto view_length = view.length();
|
auto view_length = view.length();
|
||||||
|
@ -171,7 +171,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
||||||
|
|
||||||
#ifdef REGEX_DEBUG
|
#ifdef REGEX_DEBUG
|
||||||
dbg() << "state.string_position: " << state.string_position << " view_index: " << view_index;
|
dbg() << "state.string_position: " << state.string_position << " view_index: " << view_index;
|
||||||
dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index);
|
dbg() << "[match] Found a match (length = " << state.string_position - view_index << "): " << input.view.substring_view(view_index, state.string_position - view_index).to_string();
|
||||||
#endif
|
#endif
|
||||||
++match_count;
|
++match_count;
|
||||||
|
|
||||||
|
|
|
@ -365,7 +365,7 @@ ALWAYS_INLINE bool PosixExtendedParser::parse_bracket_expression(ByteCode& stack
|
||||||
values.take_last(); // RangeExpressionDummy
|
values.take_last(); // RangeExpressionDummy
|
||||||
auto value1 = values.take_last();
|
auto value1 = values.take_last();
|
||||||
|
|
||||||
values.append({ CharacterCompareType::CharRange, static_cast<ByteCodeValueType>(CharRange { (char)value1.value, (char)value2.value }) });
|
values.append({ CharacterCompareType::CharRange, static_cast<ByteCodeValueType>(CharRange { (u32)value1.value, (u32)value2.value }) });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,9 @@
|
||||||
file(GLOB TEST_SOURCES CONFIGURE_DEPENDS "*.cpp")
|
file(GLOB TEST_SOURCES CONFIGURE_DEPENDS "*.cpp")
|
||||||
file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp")
|
file(GLOB REGEX_SOURCES CONFIGURE_DEPENDS "../*.cpp" "../C/*.cpp")
|
||||||
file(GLOB C_REGEX_SOURCES CONFIGURE_DEPENDS "../C/*.cpp")
|
|
||||||
|
|
||||||
foreach(source ${TEST_SOURCES})
|
foreach(source ${TEST_SOURCES})
|
||||||
get_filename_component(name ${source} NAME_WE)
|
get_filename_component(name ${source} NAME_WE)
|
||||||
add_executable(${name} ${source} ${REGEX_SOURCES} ${C_REGEX_SOURCES})
|
add_executable(${name} ${source} ${REGEX_SOURCES})
|
||||||
target_link_libraries(${name} LagomCore)
|
target_link_libraries(${name} LagomCore)
|
||||||
add_test(
|
add_test(
|
||||||
NAME ${name}
|
NAME ${name}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue