1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 05:27:43 +00:00

LibRegex: Add RegexStringView wrapper to support utf8 and utf32 views

This commit is contained in:
Emanuel Sprung 2020-06-09 00:15:09 +02:00 committed by Andreas Kling
parent 12dd40a2a5
commit 4a630d4b63
11 changed files with 596 additions and 304 deletions

View file

@ -31,11 +31,196 @@
#include "AK/FlyString.h"
#include "AK/HashMap.h"
#include "AK/String.h"
#include "AK/StringBuilder.h"
#include "AK/StringView.h"
#include "AK/Utf32View.h"
#include "AK/Vector.h"
namespace regex {
class RegexStringView {
public:
RegexStringView(const char* chars)
: m_u8view(chars)
{
}
RegexStringView(const String& string)
: m_u8view(string)
{
}
RegexStringView(const StringView view)
: m_u8view(view)
{
}
RegexStringView(const Utf32View view)
: m_u32view(view)
{
}
bool is_u8_view() const { return m_u8view.has_value(); }
bool is_u32_view() const { return m_u32view.has_value(); }
const StringView& u8view() const
{
ASSERT(m_u8view.has_value());
return m_u8view.value();
};
const Utf32View& u32view() const
{
ASSERT(m_u32view.has_value());
return m_u32view.value();
};
bool is_empty() const
{
if (is_u8_view())
return m_u8view.value().is_empty();
else
return m_u32view.value().is_empty();
}
bool is_null() const
{
if (is_u8_view())
return m_u8view.value().is_null();
else
return m_u32view.value().code_points() == nullptr;
}
size_t length() const
{
if (is_u8_view())
return m_u8view.value().length();
else
return m_u32view.value().length();
}
Vector<RegexStringView> lines() const
{
if (is_u8_view()) {
auto views = u8view().lines(false);
Vector<RegexStringView> new_views;
for (auto& view : views)
new_views.append(move(view));
return new_views;
}
// FIXME: line splitting for Utf32View needed
Vector<RegexStringView> views;
views.append(m_u32view.value());
return views;
}
RegexStringView substring_view(size_t offset, size_t length) const
{
if (is_u8_view()) {
return u8view().substring_view(offset, length);
}
return u32view().substring_view(offset, length);
}
String to_string() const
{
if (is_u8_view()) {
return u8view().to_string();
}
StringBuilder builder;
builder.append(u32view());
return builder.to_string();
}
u32 operator[](size_t index) const
{
if (is_u8_view()) {
return u8view()[index];
}
return u32view().code_points()[index];
}
bool operator==(const char* cstring) const
{
if (is_u8_view())
return u8view() == cstring;
return to_string() == cstring;
}
bool operator!=(const char* cstring) const
{
return !(*this == cstring);
}
bool operator==(const String& string) const
{
if (is_u8_view())
return u8view() == string;
return to_string() == string;
}
bool operator==(const StringView& other) const
{
if (is_u8_view())
return u8view() == other;
return false;
}
bool operator!=(const StringView& other) const
{
return !(*this == other);
}
bool operator==(const Utf32View& other) const
{
if (is_u32_view()) {
StringBuilder builder;
builder.append(other);
return to_string() == builder.to_string();
}
return false;
}
bool operator!=(const Utf32View& other) const
{
return !(*this == other);
}
const char* characters_without_null_termination() const
{
if(is_u8_view())
return u8view().characters_without_null_termination();
return to_string().characters(); // FIXME: it contains the null termination, does that actually matter?
}
bool starts_with(const StringView& str) const
{
if(is_u32_view())
return false;
return u8view().starts_with(str);
}
bool starts_with(const Utf32View& str) const
{
if(is_u8_view())
return false;
StringBuilder builder;
builder.append(str);
return to_string().starts_with(builder.to_string());
}
private:
Optional<StringView> m_u8view;
Optional<Utf32View> m_u32view;
};
class Match final {
private:
Optional<FlyString> string;
@ -44,7 +229,7 @@ public:
Match() = default;
~Match() = default;
Match(const StringView view_, const size_t line_, const size_t column_, const size_t global_offset_)
Match(const RegexStringView view_, const size_t line_, const size_t column_, const size_t global_offset_)
: view(view_)
, line(line_)
, column(column_)
@ -63,7 +248,7 @@ public:
{
}
StringView view { nullptr };
RegexStringView view { nullptr };
size_t line { 0 };
size_t column { 0 };
size_t global_offset { 0 };
@ -74,14 +259,14 @@ public:
};
struct MatchInput {
StringView view { nullptr };
RegexStringView view { nullptr };
AllOptions regex_options {};
size_t match_index { 0 };
size_t line { 0 };
size_t column { 0 };
size_t global_offset { 0 }; // For multiline matching, knowning the offset from start could be important
size_t global_offset { 0 }; // For multiline matching, knowing the offset from start could be important
};
struct MatchState {
@ -98,3 +283,5 @@ struct MatchOutput {
};
}
using regex::RegexStringView;