From f12c98b29f4ae8a63ddb5d9f33ba3ad41ade2c7c Mon Sep 17 00:00:00 2001 From: AnotherTest Date: Thu, 3 Dec 2020 18:55:36 +0330 Subject: [PATCH] LibRegex: Add a basic Regex<...>::replace() --- Libraries/LibRegex/RegexMatcher.h | 42 ++++++++++++++++++++++++++++++ Libraries/LibRegex/Tests/Regex.cpp | 33 +++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/Libraries/LibRegex/RegexMatcher.h b/Libraries/LibRegex/RegexMatcher.h index 4611ef278a..b62dab0be7 100644 --- a/Libraries/LibRegex/RegexMatcher.h +++ b/Libraries/LibRegex/RegexMatcher.h @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -115,6 +116,47 @@ public: return matcher->match(views, regex_options); } + String replace(const RegexStringView view, const StringView& replacement_pattern, Optional::OptionsType> regex_options = {}) const + { + if (!matcher || parser_result.error != Error::NoError) + return {}; + + StringBuilder builder; + size_t start_offset = 0; + RegexResult result = matcher->match(view, regex_options); + if (!result.success) + return view.to_string(); + + for (size_t i = 0; i < result.matches.size(); ++i) { + auto& match = result.matches[i]; + builder.append(view.substring_view(start_offset, match.global_offset - start_offset).to_string()); + start_offset = match.global_offset + match.view.length(); + GenericLexer lexer(replacement_pattern); + while (!lexer.is_eof()) { + if (lexer.consume_specific('\\')) { + if (lexer.consume_specific('\\')) { + builder.append('\\'); + continue; + } + auto number = lexer.consume_while(isdigit); + if (auto index = number.to_uint(); index.has_value() && result.n_capture_groups >= index.value()) { + builder.append(result.capture_group_matches[i][index.value() - 1].view.to_string()); + } else { + builder.appendff("\\{}", number); + } + } else { + builder.append(lexer.consume_while([](auto ch) { return ch != '\\'; })); + } + } + } + + builder.append(view.substring_view(start_offset, view.length() - start_offset).to_string()); + + return builder.to_string(); + } + + // FIXME: replace(const Vector, ...) + RegexResult search(const RegexStringView view, Optional::OptionsType> regex_options = {}) const { if (!matcher || parser_result.error != Error::NoError) diff --git a/Libraries/LibRegex/Tests/Regex.cpp b/Libraries/LibRegex/Tests/Regex.cpp index d9c5f7ce31..e4fb05c3c4 100644 --- a/Libraries/LibRegex/Tests/Regex.cpp +++ b/Libraries/LibRegex/Tests/Regex.cpp @@ -563,4 +563,37 @@ TEST_CASE(ECMA262_match) } } +TEST_CASE(replace) +{ + struct _test { + const char* pattern; + const char* replacement; + const char* subject; + const char* expected; + ECMAScriptFlags options {}; + }; + + constexpr _test tests[] { + { "foo(.+)", "aaa", "test", "test" }, + { "foo(.+)", "test\\1", "foobar", "testbar" }, + { "foo(.+)", "\\2\\1", "foobar", "\\2bar" }, + { "foo(.+)", "\\\\\\1", "foobar", "\\bar" }, + { "foo(.)", "a\\1", "fooxfooy", "axay", ECMAScriptFlags::Multiline }, + }; + + for (auto& test : tests) { + Regex re(test.pattern, test.options); +#ifdef REGEX_DEBUG + dbg() << "\n"; + RegexDebug regex_dbg(stderr); + regex_dbg.print_raw_bytecode(re); + regex_dbg.print_header(); + regex_dbg.print_bytecode(re); + dbg() << "\n"; +#endif + EXPECT_EQ(re.parser_result.error, Error::NoError); + EXPECT_EQ(re.replace(test.subject, test.replacement), test.expected); + } +} + TEST_MAIN(Regex)