mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 20:17:44 +00:00
LibJS: Hook up Regex<ECMA262> to RegExpObject and implement `test()'
This makes RegExpObject compile and store a Regex<ECMA262>, adds all flag-related properties, and implements `RegExpPrototype.test()` (complete with 'lastIndex' support) :^) It should be noted that this only implements `test()' using the builtin `exec()'.
This commit is contained in:
parent
75081b2bdd
commit
8ba273a2f3
13 changed files with 396 additions and 12 deletions
|
@ -261,6 +261,7 @@ public:
|
|||
struct MatchInput {
|
||||
RegexStringView view { nullptr };
|
||||
AllOptions regex_options {};
|
||||
size_t start_offset { 0 }; // For Stateful matches, saved and restored from Regex::start_offset.
|
||||
|
||||
size_t match_index { 0 };
|
||||
size_t line { 0 };
|
||||
|
|
|
@ -52,6 +52,15 @@ Regex<Parser>::Regex(StringView pattern, typename ParserTraits<Parser>::OptionsT
|
|||
}
|
||||
}
|
||||
|
||||
template<class Parser>
|
||||
typename ParserTraits<Parser>::OptionsType Regex<Parser>::options() const
|
||||
{
|
||||
if (parser_result.error != Error::NoError)
|
||||
return {};
|
||||
|
||||
return matcher->options();
|
||||
}
|
||||
|
||||
template<class Parser>
|
||||
String Regex<Parser>::error_string(Optional<String> message) const
|
||||
{
|
||||
|
@ -81,6 +90,10 @@ RegexResult Matcher<Parser>::match(const RegexStringView& view, Optional<typenam
|
|||
template<typename Parser>
|
||||
RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional<typename ParserTraits<Parser>::OptionsType> regex_options) const
|
||||
{
|
||||
// If the pattern *itself* isn't stateful, reset any changes to start_offset.
|
||||
if (!((AllFlags)m_regex_options.value() & AllFlags::Internal_Stateful))
|
||||
m_pattern.start_offset = 0;
|
||||
|
||||
size_t match_count { 0 };
|
||||
|
||||
MatchInput input;
|
||||
|
@ -88,8 +101,12 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
MatchOutput output;
|
||||
|
||||
input.regex_options = m_regex_options | regex_options.value_or({}).value();
|
||||
input.start_offset = m_pattern.start_offset;
|
||||
output.operations = 0;
|
||||
|
||||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||
ASSERT(views.size() == 1);
|
||||
|
||||
if (c_match_preallocation_count) {
|
||||
output.matches.ensure_capacity(c_match_preallocation_count);
|
||||
output.capture_group_matches.ensure_capacity(c_match_preallocation_count);
|
||||
|
@ -115,7 +132,7 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
output.matches.empend();
|
||||
|
||||
ASSERT(start_position + state.string_position - start_position <= input.view.length());
|
||||
if (input.regex_options & AllFlags::StringCopyMatches) {
|
||||
if (input.regex_options.has_flag_set(AllFlags::StringCopyMatches)) {
|
||||
output.matches.at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position).to_string(), input.line, start_position, input.global_offset + start_position };
|
||||
} else { // let the view point to the original string ...
|
||||
output.matches.at(input.match_index) = { input.view.substring_view(start_position, state.string_position - start_position), input.line, start_position, input.global_offset + start_position };
|
||||
|
@ -126,7 +143,9 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
s_regex_dbg.print_header();
|
||||
#endif
|
||||
|
||||
bool continue_search = (input.regex_options & AllFlags::Global) || (input.regex_options & AllFlags::Multiline);
|
||||
bool continue_search = input.regex_options.has_flag_set(AllFlags::Global) || input.regex_options.has_flag_set(AllFlags::Multiline);
|
||||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||
continue_search = false;
|
||||
|
||||
for (auto& view : views) {
|
||||
input.view = view;
|
||||
|
@ -135,7 +154,9 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
#endif
|
||||
|
||||
auto view_length = view.length();
|
||||
for (size_t view_index = 0; view_index < view_length; ++view_index) {
|
||||
size_t view_index = m_pattern.start_offset;
|
||||
state.string_position = view_index;
|
||||
for (; view_index < view_length; ++view_index) {
|
||||
auto& match_length_minimum = m_pattern.parser_result.match_length_minimum;
|
||||
// FIXME: More performant would be to know the remaining minimum string
|
||||
// length needed to match from the current position onwards within
|
||||
|
@ -158,12 +179,12 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
|
||||
if (success.value()) {
|
||||
|
||||
if ((input.regex_options & AllFlags::MatchNotEndOfLine) && state.string_position == input.view.length()) {
|
||||
if (input.regex_options.has_flag_set(AllFlags::MatchNotEndOfLine) && state.string_position == input.view.length()) {
|
||||
if (!continue_search)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
if ((input.regex_options & AllFlags::MatchNotBeginOfLine) && view_index == 0) {
|
||||
if (input.regex_options.has_flag_set(AllFlags::MatchNotBeginOfLine) && view_index == 0) {
|
||||
if (!continue_search)
|
||||
break;
|
||||
continue;
|
||||
|
@ -182,26 +203,34 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
|
|||
view_index = state.string_position - (has_zero_length ? 0 : 1);
|
||||
continue;
|
||||
|
||||
} else if (!continue_search && state.string_position < view_length)
|
||||
} else if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {
|
||||
append_match(input, state, output, view_index);
|
||||
break;
|
||||
|
||||
} else if (state.string_position < view_length) {
|
||||
return { false, 0, {}, {}, {}, output.operations };
|
||||
}
|
||||
|
||||
append_match(input, state, output, view_index);
|
||||
break;
|
||||
}
|
||||
|
||||
if (!continue_search)
|
||||
if (!continue_search && !input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||
break;
|
||||
}
|
||||
|
||||
++input.line;
|
||||
input.global_offset += view.length() + 1; // +1 includes the line break character
|
||||
|
||||
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful))
|
||||
m_pattern.start_offset = state.string_position;
|
||||
}
|
||||
|
||||
MatchOutput output_copy;
|
||||
if (match_count) {
|
||||
auto capture_groups_count = min(output.capture_group_matches.size(), output.matches.size());
|
||||
for (size_t i = 0; i < capture_groups_count; ++i) {
|
||||
if (input.regex_options & AllFlags::SkipTrimEmptyMatches) {
|
||||
if (input.regex_options.has_flag_set(AllFlags::SkipTrimEmptyMatches)) {
|
||||
output_copy.capture_group_matches.append(output.capture_group_matches.at(i));
|
||||
} else {
|
||||
Vector<Match> capture_group_matches;
|
||||
|
|
|
@ -73,6 +73,11 @@ public:
|
|||
RegexResult match(const RegexStringView&, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
||||
RegexResult match(const Vector<RegexStringView>, Optional<typename ParserTraits<Parser>::OptionsType> = {}) const;
|
||||
|
||||
typename ParserTraits<Parser>::OptionsType options() const
|
||||
{
|
||||
return m_regex_options;
|
||||
}
|
||||
|
||||
private:
|
||||
Optional<bool> execute(const MatchInput& input, MatchState& state, MatchOutput& output, size_t recursion_level) const;
|
||||
ALWAYS_INLINE Optional<bool> execute_low_prio_forks(const MatchInput& input, MatchState& original_state, MatchOutput& output, Vector<MatchState> states, size_t recursion_level) const;
|
||||
|
@ -87,10 +92,12 @@ public:
|
|||
String pattern_value;
|
||||
regex::Parser::Result parser_result;
|
||||
OwnPtr<Matcher<Parser>> matcher { nullptr };
|
||||
mutable size_t start_offset { 0 };
|
||||
|
||||
explicit Regex(StringView pattern, typename ParserTraits<Parser>::OptionsType regex_options = {});
|
||||
~Regex() = default;
|
||||
|
||||
typename ParserTraits<Parser>::OptionsType options() const;
|
||||
void print_bytecode(FILE* f = stdout) const;
|
||||
String error_string(Optional<String> message = {}) const;
|
||||
|
||||
|
|
|
@ -53,7 +53,8 @@ enum class AllFlags {
|
|||
Sticky = __Regex_Sticky, // Force the pattern to only match consecutive matches from where the previous match ended.
|
||||
Multiline = __Regex_Multiline, // Handle newline characters. Match each line, one by one.
|
||||
SkipTrimEmptyMatches = __Regex_SkipTrimEmptyMatches, // Do not remove empty capture group results.
|
||||
Last = SkipTrimEmptyMatches
|
||||
Internal_Stateful = __Regex_Internal_Stateful, // Make global matches match one result at a time, and further match() calls on the same instance continue where the previous one left off.
|
||||
Last = Internal_Stateful,
|
||||
};
|
||||
|
||||
enum class PosixFlags : FlagsUnderlyingType {
|
||||
|
@ -72,7 +73,7 @@ enum class PosixFlags : FlagsUnderlyingType {
|
|||
};
|
||||
|
||||
enum class ECMAScriptFlags : FlagsUnderlyingType {
|
||||
Global = (FlagsUnderlyingType)AllFlags::Global,
|
||||
Global = (FlagsUnderlyingType)AllFlags::Global | (FlagsUnderlyingType)AllFlags::Internal_Stateful, // Note: ECMAScript "Global" creates a stateful regex.
|
||||
Insensitive = (FlagsUnderlyingType)AllFlags::Insensitive,
|
||||
Ungreedy = (FlagsUnderlyingType)AllFlags::Ungreedy,
|
||||
Unicode = (FlagsUnderlyingType)AllFlags::Unicode,
|
||||
|
@ -123,7 +124,7 @@ public:
|
|||
void reset_flags() { m_flags = (T)0; }
|
||||
void reset_flag(T flag) { m_flags = (T)((FlagsUnderlyingType)m_flags & ~(FlagsUnderlyingType)flag); }
|
||||
void set_flag(T flag) { *this |= flag; }
|
||||
bool has_flag_set(T flag) const { return *this & flag; }
|
||||
bool has_flag_set(T flag) const { return (FlagsUnderlyingType)flag == ((FlagsUnderlyingType)m_flags & (FlagsUnderlyingType)flag); }
|
||||
T value() const { return m_flags; }
|
||||
|
||||
private:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue