1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 08:37:45 +00:00

LibRegex: Implement multiline stateful matches

This commit is contained in:
Ali Mohammad Pur 2021-04-23 03:55:19 +04:30 committed by Linus Groh
parent bb40d4d5ff
commit bf9c04a3da
2 changed files with 69 additions and 2 deletions

View file

@ -155,3 +155,53 @@ test("undefined match result", () => {
r.exec = () => ({}); r.exec = () => ({});
expect(r[Symbol.replace]()).toBe("undefined"); expect(r[Symbol.replace]()).toBe("undefined");
}); });
// Multiline test
test("multiline match", () => {
let reg = /\s*\/\/.*$/gm;
let string = `
(
(?:[a-fA-F\d]{1,4}:){7}(?:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8
(?:[a-fA-F\d]{1,4}:){6}(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4
(?:[a-fA-F\d]{1,4}:){5}(?::(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,2}|:)| // 1:2:3:4:5:: 1:2:3:4:5::7:8 1:2:3:4:5::8 1:2:3:4:5::7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){4}(?:(:[a-fA-F\d]{1,4}){0,1}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,3}|:)| // 1:2:3:4:: 1:2:3:4::6:7:8 1:2:3:4::8 1:2:3:4::6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){3}(?:(:[a-fA-F\d]{1,4}){0,2}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,4}|:)| // 1:2:3:: 1:2:3::5:6:7:8 1:2:3::8 1:2:3::5:6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){2}(?:(:[a-fA-F\d]{1,4}){0,3}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,5}|:)| // 1:2:: 1:2::4:5:6:7:8 1:2::8 1:2::4:5:6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){1}(?:(:[a-fA-F\d]{1,4}){0,4}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,6}|:)| // 1:: 1::3:4:5:6:7:8 1::8 1::3:4:5:6:7:1.2.3.4
(?::((?::[a-fA-F\d]{1,4}){0,5}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(?::[a-fA-F\d]{1,4}){1,7}|:)) // ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::1.2.3.4
)(%[0-9a-zA-Z]{1,})? // %eth0 %1
`;
let res = reg.exec(string);
expect(res.length).toBe(1);
expect(res[0]).toBe(" // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8");
expect(res.index).toBe(46);
});
test("multiline stateful match", () => {
let reg = /\s*\/\/.*$/gm;
let string = `
(
(?:[a-fA-F\d]{1,4}:){7}(?:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8
(?:[a-fA-F\d]{1,4}:){6}(?:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|:[a-fA-F\d]{1,4}|:)| // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4
(?:[a-fA-F\d]{1,4}:){5}(?::(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,2}|:)| // 1:2:3:4:5:: 1:2:3:4:5::7:8 1:2:3:4:5::8 1:2:3:4:5::7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){4}(?:(:[a-fA-F\d]{1,4}){0,1}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,3}|:)| // 1:2:3:4:: 1:2:3:4::6:7:8 1:2:3:4::8 1:2:3:4::6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){3}(?:(:[a-fA-F\d]{1,4}){0,2}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,4}|:)| // 1:2:3:: 1:2:3::5:6:7:8 1:2:3::8 1:2:3::5:6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){2}(?:(:[a-fA-F\d]{1,4}){0,3}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,5}|:)| // 1:2:: 1:2::4:5:6:7:8 1:2::8 1:2::4:5:6:7:1.2.3.4
(?:[a-fA-F\d]{1,4}:){1}(?:(:[a-fA-F\d]{1,4}){0,4}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(:[a-fA-F\d]{1,4}){1,6}|:)| // 1:: 1::3:4:5:6:7:8 1::8 1::3:4:5:6:7:1.2.3.4
(?::((?::[a-fA-F\d]{1,4}){0,5}:(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]\d|\d)){3}|(?::[a-fA-F\d]{1,4}){1,7}|:)) // ::2:3:4:5:6:7:8 ::2:3:4:5:6:7:8 ::8 ::1.2.3.4
)(%[0-9a-zA-Z]{1,})? // %eth0 %1
`;
let res = reg.exec(string);
expect(res.length).toBe(1);
expect(res[0]).toBe(" // 1:2:3:4:5:6:7:: 1:2:3:4:5:6:7:8");
expect(res.index).toBe(46);
res = reg.exec(string);
expect(res.length).toBe(1);
expect(res[0]).toBe(
" // 1:2:3:4:5:6:: 1:2:3:4:5:6::8 1:2:3:4:5:6::8 1:2:3:4:5:6::1.2.3.4"
);
expect(res.index).toBe(231);
});

View file

@ -82,9 +82,21 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
input.regex_options = m_regex_options | regex_options.value_or({}).value(); input.regex_options = m_regex_options | regex_options.value_or({}).value();
input.start_offset = m_pattern.start_offset; input.start_offset = m_pattern.start_offset;
output.operations = 0; output.operations = 0;
size_t lines_to_skip = 0;
if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) if (input.regex_options.has_flag_set(AllFlags::Internal_Stateful)) {
VERIFY(views.size() == 1); if (views.size() > 1 && input.start_offset > views.first().length()) {
dbgln("Started with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip);
for (auto& view : views) {
if (input.start_offset < view.length() + 1)
break;
++lines_to_skip;
input.start_offset -= view.length() + 1;
input.global_offset += view.length() + 1;
}
dbgln("Ended with start={}, goff={}, skip={}", input.start_offset, input.global_offset, lines_to_skip);
}
}
if (c_match_preallocation_count) { if (c_match_preallocation_count) {
output.matches.ensure_capacity(c_match_preallocation_count); output.matches.ensure_capacity(c_match_preallocation_count);
@ -127,6 +139,11 @@ RegexResult Matcher<Parser>::match(const Vector<RegexStringView> views, Optional
continue_search = false; continue_search = false;
for (auto& view : views) { for (auto& view : views) {
if (lines_to_skip != 0) {
++input.line;
--lines_to_skip;
continue;
}
input.view = view; input.view = view;
dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view); dbgln_if(REGEX_DEBUG, "[match] Starting match with view ({}): _{}_", view.length(), view);