mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 18:57:45 +00:00
LibRegex: Track string position in both code units and code points
In non-Unicode mode, the existing MatchState::string_position is tracked in code units; in Unicode mode, it is tracked in code points. In order for some RegexStringView operations to be performant, it is useful for the MatchState to have a field to always track the position in code units. This will allow RegexStringView methods (e.g. operator[]) to perform lookups based on code unit offsets, rather than needing to iterate over the entire string to find a code point offset.
This commit is contained in:
parent
dae7674ca9
commit
27d555bab0
3 changed files with 95 additions and 41 deletions
|
@ -198,6 +198,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
|||
auto view_length = view.length();
|
||||
size_t view_index = m_pattern->start_offset;
|
||||
state.string_position = view_index;
|
||||
state.string_position_in_code_units = view_index;
|
||||
bool succeeded = false;
|
||||
|
||||
if (view_index == view_length && m_pattern->parser_result.match_length_minimum == 0) {
|
||||
|
@ -210,6 +211,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
|||
input.match_index = match_count;
|
||||
|
||||
state.string_position = view_index;
|
||||
state.string_position_in_code_units = view_index;
|
||||
state.instruction_position = 0;
|
||||
|
||||
auto success = execute(input, state, temp_output);
|
||||
|
@ -241,6 +243,7 @@ RegexResult Matcher<Parser>::match(Vector<RegexStringView> const& views, Optiona
|
|||
input.match_index = match_count;
|
||||
|
||||
state.string_position = view_index;
|
||||
state.string_position_in_code_units = view_index;
|
||||
state.instruction_position = 0;
|
||||
|
||||
auto success = execute(input, state, output);
|
||||
|
@ -388,7 +391,7 @@ private:
|
|||
Node* previous { nullptr };
|
||||
};
|
||||
|
||||
UniformBumpAllocator<Node, true> m_allocator;
|
||||
UniformBumpAllocator<Node, true, 8 * MiB> m_allocator;
|
||||
Node* m_first { nullptr };
|
||||
Node* m_last { nullptr };
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue