mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 05:27:43 +00:00
LibRegex: Add RegexStringView wrapper to support utf8 and utf32 views
This commit is contained in:
parent
12dd40a2a5
commit
4a630d4b63
11 changed files with 596 additions and 304 deletions
|
@ -31,11 +31,196 @@
|
|||
#include "AK/FlyString.h"
|
||||
#include "AK/HashMap.h"
|
||||
#include "AK/String.h"
|
||||
#include "AK/StringBuilder.h"
|
||||
#include "AK/StringView.h"
|
||||
#include "AK/Utf32View.h"
|
||||
#include "AK/Vector.h"
|
||||
|
||||
namespace regex {
|
||||
|
||||
class RegexStringView {
|
||||
public:
|
||||
RegexStringView(const char* chars)
|
||||
: m_u8view(chars)
|
||||
{
|
||||
}
|
||||
|
||||
RegexStringView(const String& string)
|
||||
: m_u8view(string)
|
||||
{
|
||||
}
|
||||
|
||||
RegexStringView(const StringView view)
|
||||
: m_u8view(view)
|
||||
{
|
||||
}
|
||||
RegexStringView(const Utf32View view)
|
||||
: m_u32view(view)
|
||||
{
|
||||
}
|
||||
|
||||
bool is_u8_view() const { return m_u8view.has_value(); }
|
||||
bool is_u32_view() const { return m_u32view.has_value(); }
|
||||
|
||||
const StringView& u8view() const
|
||||
{
|
||||
ASSERT(m_u8view.has_value());
|
||||
return m_u8view.value();
|
||||
};
|
||||
|
||||
const Utf32View& u32view() const
|
||||
{
|
||||
ASSERT(m_u32view.has_value());
|
||||
return m_u32view.value();
|
||||
};
|
||||
|
||||
bool is_empty() const
|
||||
{
|
||||
if (is_u8_view())
|
||||
return m_u8view.value().is_empty();
|
||||
else
|
||||
return m_u32view.value().is_empty();
|
||||
}
|
||||
|
||||
bool is_null() const
|
||||
{
|
||||
if (is_u8_view())
|
||||
return m_u8view.value().is_null();
|
||||
else
|
||||
return m_u32view.value().code_points() == nullptr;
|
||||
}
|
||||
|
||||
size_t length() const
|
||||
{
|
||||
if (is_u8_view())
|
||||
return m_u8view.value().length();
|
||||
else
|
||||
return m_u32view.value().length();
|
||||
}
|
||||
|
||||
Vector<RegexStringView> lines() const
|
||||
{
|
||||
if (is_u8_view()) {
|
||||
auto views = u8view().lines(false);
|
||||
Vector<RegexStringView> new_views;
|
||||
for (auto& view : views)
|
||||
new_views.append(move(view));
|
||||
return new_views;
|
||||
}
|
||||
|
||||
// FIXME: line splitting for Utf32View needed
|
||||
Vector<RegexStringView> views;
|
||||
views.append(m_u32view.value());
|
||||
return views;
|
||||
}
|
||||
|
||||
RegexStringView substring_view(size_t offset, size_t length) const
|
||||
{
|
||||
if (is_u8_view()) {
|
||||
return u8view().substring_view(offset, length);
|
||||
}
|
||||
return u32view().substring_view(offset, length);
|
||||
}
|
||||
|
||||
String to_string() const
|
||||
{
|
||||
if (is_u8_view()) {
|
||||
return u8view().to_string();
|
||||
}
|
||||
|
||||
StringBuilder builder;
|
||||
builder.append(u32view());
|
||||
return builder.to_string();
|
||||
}
|
||||
|
||||
u32 operator[](size_t index) const
|
||||
{
|
||||
if (is_u8_view()) {
|
||||
return u8view()[index];
|
||||
}
|
||||
return u32view().code_points()[index];
|
||||
}
|
||||
|
||||
bool operator==(const char* cstring) const
|
||||
{
|
||||
if (is_u8_view())
|
||||
return u8view() == cstring;
|
||||
|
||||
return to_string() == cstring;
|
||||
}
|
||||
|
||||
bool operator!=(const char* cstring) const
|
||||
{
|
||||
return !(*this == cstring);
|
||||
}
|
||||
|
||||
bool operator==(const String& string) const
|
||||
{
|
||||
if (is_u8_view())
|
||||
return u8view() == string;
|
||||
|
||||
return to_string() == string;
|
||||
}
|
||||
|
||||
bool operator==(const StringView& other) const
|
||||
{
|
||||
if (is_u8_view())
|
||||
return u8view() == other;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool operator!=(const StringView& other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
bool operator==(const Utf32View& other) const
|
||||
{
|
||||
if (is_u32_view()) {
|
||||
StringBuilder builder;
|
||||
builder.append(other);
|
||||
return to_string() == builder.to_string();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool operator!=(const Utf32View& other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
const char* characters_without_null_termination() const
|
||||
{
|
||||
if(is_u8_view())
|
||||
return u8view().characters_without_null_termination();
|
||||
|
||||
return to_string().characters(); // FIXME: it contains the null termination, does that actually matter?
|
||||
}
|
||||
|
||||
bool starts_with(const StringView& str) const
|
||||
{
|
||||
if(is_u32_view())
|
||||
return false;
|
||||
return u8view().starts_with(str);
|
||||
}
|
||||
|
||||
bool starts_with(const Utf32View& str) const
|
||||
{
|
||||
if(is_u8_view())
|
||||
return false;
|
||||
|
||||
StringBuilder builder;
|
||||
builder.append(str);
|
||||
return to_string().starts_with(builder.to_string());
|
||||
}
|
||||
|
||||
private:
|
||||
Optional<StringView> m_u8view;
|
||||
Optional<Utf32View> m_u32view;
|
||||
};
|
||||
|
||||
class Match final {
|
||||
private:
|
||||
Optional<FlyString> string;
|
||||
|
@ -44,7 +229,7 @@ public:
|
|||
Match() = default;
|
||||
~Match() = default;
|
||||
|
||||
Match(const StringView view_, const size_t line_, const size_t column_, const size_t global_offset_)
|
||||
Match(const RegexStringView view_, const size_t line_, const size_t column_, const size_t global_offset_)
|
||||
: view(view_)
|
||||
, line(line_)
|
||||
, column(column_)
|
||||
|
@ -63,7 +248,7 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
StringView view { nullptr };
|
||||
RegexStringView view { nullptr };
|
||||
size_t line { 0 };
|
||||
size_t column { 0 };
|
||||
size_t global_offset { 0 };
|
||||
|
@ -74,14 +259,14 @@ public:
|
|||
};
|
||||
|
||||
struct MatchInput {
|
||||
StringView view { nullptr };
|
||||
RegexStringView view { nullptr };
|
||||
AllOptions regex_options {};
|
||||
|
||||
size_t match_index { 0 };
|
||||
size_t line { 0 };
|
||||
size_t column { 0 };
|
||||
|
||||
size_t global_offset { 0 }; // For multiline matching, knowning the offset from start could be important
|
||||
size_t global_offset { 0 }; // For multiline matching, knowing the offset from start could be important
|
||||
};
|
||||
|
||||
struct MatchState {
|
||||
|
@ -98,3 +283,5 @@ struct MatchOutput {
|
|||
};
|
||||
|
||||
}
|
||||
|
||||
using regex::RegexStringView;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue