mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 08:54:58 +00:00
AK+LibXML+JSSpecCompiler: Move LineTrackingLexer to AK
This is a simple extension of GenericLexer, and is used in more than just LibXML, so let's move it into AK. The move also resolves a FIXME, which is removed in this commit.
This commit is contained in:
parent
08c02ad888
commit
bc301b6f40
8 changed files with 79 additions and 78 deletions
|
@ -176,6 +176,31 @@ ErrorOr<T> GenericLexer::consume_decimal_integer()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LineTrackingLexer::Position LineTrackingLexer::position_for(size_t index) const
|
||||||
|
{
|
||||||
|
auto& [cached_index, cached_line, cached_column] = m_cached_position;
|
||||||
|
|
||||||
|
if (cached_index <= index) {
|
||||||
|
for (size_t i = cached_index; i < index; ++i) {
|
||||||
|
if (m_input[i] == '\n')
|
||||||
|
++cached_line, cached_column = 0;
|
||||||
|
else
|
||||||
|
++cached_column;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto lines_backtracked = m_input.substring_view(index, cached_index - index).count('\n');
|
||||||
|
cached_line -= lines_backtracked;
|
||||||
|
if (lines_backtracked == 0) {
|
||||||
|
cached_column -= cached_index - index;
|
||||||
|
} else {
|
||||||
|
auto current_line_start = m_input.substring_view(0, index).find_last('\n').value_or(0);
|
||||||
|
cached_column = index - current_line_start;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cached_index = index;
|
||||||
|
return m_cached_position;
|
||||||
|
}
|
||||||
|
|
||||||
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
|
template ErrorOr<u8> GenericLexer::consume_decimal_integer<u8>();
|
||||||
template ErrorOr<i8> GenericLexer::consume_decimal_integer<i8>();
|
template ErrorOr<i8> GenericLexer::consume_decimal_integer<i8>();
|
||||||
template ErrorOr<u16> GenericLexer::consume_decimal_integer<u16>();
|
template ErrorOr<u16> GenericLexer::consume_decimal_integer<u16>();
|
||||||
|
|
|
@ -234,6 +234,34 @@ private:
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class LineTrackingLexer : public GenericLexer {
|
||||||
|
public:
|
||||||
|
using GenericLexer::GenericLexer;
|
||||||
|
|
||||||
|
struct Position {
|
||||||
|
size_t offset { 0 };
|
||||||
|
size_t line { 0 };
|
||||||
|
size_t column { 0 };
|
||||||
|
};
|
||||||
|
|
||||||
|
LineTrackingLexer(StringView input, Position start_position)
|
||||||
|
: GenericLexer(input)
|
||||||
|
, m_cached_position {
|
||||||
|
.line = start_position.line,
|
||||||
|
.column = start_position.column,
|
||||||
|
}
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
Position cached_position() const { return m_cached_position; }
|
||||||
|
void restore_cached_offset(Position cached_position) { m_cached_position = cached_position; }
|
||||||
|
Position position_for(size_t) const;
|
||||||
|
Position current_position() const { return position_for(m_index); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
mutable Position m_cached_position;
|
||||||
|
};
|
||||||
|
|
||||||
constexpr auto is_any_of(StringView values)
|
constexpr auto is_any_of(StringView values)
|
||||||
{
|
{
|
||||||
return [values](auto c) { return values.contains(c); };
|
return [values](auto c) { return values.contains(c); };
|
||||||
|
@ -254,4 +282,5 @@ using AK::GenericLexer;
|
||||||
using AK::is_any_of;
|
using AK::is_any_of;
|
||||||
using AK::is_path_separator;
|
using AK::is_path_separator;
|
||||||
using AK::is_quote;
|
using AK::is_quote;
|
||||||
|
using AK::LineTrackingLexer;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
namespace JSSpecCompiler {
|
namespace JSSpecCompiler {
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
Optional<Token> consume_number(XML::LineTrackingLexer& lexer, Location& location)
|
Optional<Token> consume_number(LineTrackingLexer& lexer, Location& location)
|
||||||
{
|
{
|
||||||
u64 start = lexer.tell();
|
u64 start = lexer.tell();
|
||||||
|
|
||||||
|
@ -73,14 +73,14 @@ void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, St
|
||||||
{ "+"sv, TokenType::Plus },
|
{ "+"sv, TokenType::Plus },
|
||||||
};
|
};
|
||||||
|
|
||||||
XML::LineTrackingLexer lexer(view, node->offset);
|
LineTrackingLexer lexer(view, node->offset);
|
||||||
|
|
||||||
while (!lexer.is_eof()) {
|
while (!lexer.is_eof()) {
|
||||||
lexer.ignore_while(is_ascii_space);
|
lexer.ignore_while(is_ascii_space);
|
||||||
|
|
||||||
// FIXME: This is incorrect since we count text offset after XML reference resolution. To do
|
// FIXME: This is incorrect since we count text offset after XML reference resolution. To do
|
||||||
// this properly, we need support from XML::Parser.
|
// this properly, we need support from XML::Parser.
|
||||||
Location token_location = ctx.location_from_xml_offset(lexer.offset_for(lexer.tell()));
|
Location token_location = ctx.location_from_xml_offset(lexer.position_for(lexer.tell()));
|
||||||
|
|
||||||
if (auto result = consume_number(lexer, token_location); result.has_value()) {
|
if (auto result = consume_number(lexer, token_location); result.has_value()) {
|
||||||
tokens.append(result.release_value());
|
tokens.append(result.release_value());
|
||||||
|
|
|
@ -50,12 +50,12 @@ Location SpecificationParsingContext::file_scope() const
|
||||||
return { .filename = m_translation_unit->filename() };
|
return { .filename = m_translation_unit->filename() };
|
||||||
}
|
}
|
||||||
|
|
||||||
Location SpecificationParsingContext::location_from_xml_offset(XML::Offset offset) const
|
Location SpecificationParsingContext::location_from_xml_offset(LineTrackingLexer::Position position) const
|
||||||
{
|
{
|
||||||
return {
|
return {
|
||||||
.filename = m_translation_unit->filename(),
|
.filename = m_translation_unit->filename(),
|
||||||
.line = offset.line,
|
.line = position.line,
|
||||||
.column = offset.column,
|
.column = position.column,
|
||||||
.logical_location = m_current_logical_scope,
|
.logical_location = m_current_logical_scope,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ public:
|
||||||
int step_list_nesting_level() const;
|
int step_list_nesting_level() const;
|
||||||
|
|
||||||
Location file_scope() const;
|
Location file_scope() const;
|
||||||
Location location_from_xml_offset(XML::Offset offset) const;
|
Location location_from_xml_offset(LineTrackingLexer::Position position) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
TranslationUnitRef m_translation_unit;
|
TranslationUnitRef m_translation_unit;
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <AK/ByteString.h>
|
#include <AK/ByteString.h>
|
||||||
|
#include <AK/GenericLexer.h>
|
||||||
#include <AK/HashMap.h>
|
#include <AK/HashMap.h>
|
||||||
#include <AK/Variant.h>
|
#include <AK/Variant.h>
|
||||||
#include <AK/Vector.h>
|
#include <AK/Vector.h>
|
||||||
|
@ -19,12 +20,6 @@ struct Attribute {
|
||||||
ByteString value;
|
ByteString value;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Offset {
|
|
||||||
size_t offset { 0 };
|
|
||||||
size_t line { 0 };
|
|
||||||
size_t column { 0 };
|
|
||||||
};
|
|
||||||
|
|
||||||
struct Node {
|
struct Node {
|
||||||
struct Text {
|
struct Text {
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
|
@ -40,7 +35,7 @@ struct Node {
|
||||||
|
|
||||||
bool operator==(Node const&) const;
|
bool operator==(Node const&) const;
|
||||||
|
|
||||||
Offset offset;
|
LineTrackingLexer::Position offset;
|
||||||
Variant<Text, Comment, Element> content;
|
Variant<Text, Comment, Element> content;
|
||||||
Node* parent { nullptr };
|
Node* parent { nullptr };
|
||||||
|
|
||||||
|
|
|
@ -66,31 +66,6 @@ consteval static auto set_to_search()
|
||||||
|
|
||||||
namespace XML {
|
namespace XML {
|
||||||
|
|
||||||
Offset LineTrackingLexer::offset_for(size_t index) const
|
|
||||||
{
|
|
||||||
auto& [cached_index, cached_line, cached_column] = m_cached_offset;
|
|
||||||
|
|
||||||
if (cached_index <= index) {
|
|
||||||
for (size_t i = cached_index; i < index; ++i) {
|
|
||||||
if (m_input[i] == '\n')
|
|
||||||
++cached_line, cached_column = 0;
|
|
||||||
else
|
|
||||||
++cached_column;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
auto lines_backtracked = m_input.substring_view(index, cached_index - index).count('\n');
|
|
||||||
cached_line -= lines_backtracked;
|
|
||||||
if (lines_backtracked == 0) {
|
|
||||||
cached_column -= cached_index - index;
|
|
||||||
} else {
|
|
||||||
auto current_line_start = m_input.substring_view(0, index).find_last('\n').value_or(0);
|
|
||||||
cached_column = index - current_line_start;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
cached_index = index;
|
|
||||||
return m_cached_offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t Parser::s_debug_indent_level { 0 };
|
size_t Parser::s_debug_indent_level { 0 };
|
||||||
|
|
||||||
void Parser::append_node(NonnullOwnPtr<Node> node)
|
void Parser::append_node(NonnullOwnPtr<Node> node)
|
||||||
|
@ -105,7 +80,7 @@ void Parser::append_node(NonnullOwnPtr<Node> node)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Parser::append_text(StringView text, Offset offset)
|
void Parser::append_text(StringView text, LineTrackingLexer::Position position)
|
||||||
{
|
{
|
||||||
if (m_listener) {
|
if (m_listener) {
|
||||||
m_listener->text(text);
|
m_listener->text(text);
|
||||||
|
@ -115,7 +90,7 @@ void Parser::append_text(StringView text, Offset offset)
|
||||||
if (!m_entered_node) {
|
if (!m_entered_node) {
|
||||||
Node::Text node;
|
Node::Text node;
|
||||||
node.builder.append(text);
|
node.builder.append(text);
|
||||||
m_root_node = make<Node>(offset, move(node));
|
m_root_node = make<Node>(position, move(node));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,7 +105,7 @@ void Parser::append_text(StringView text, Offset offset)
|
||||||
}
|
}
|
||||||
Node::Text text_node;
|
Node::Text text_node;
|
||||||
text_node.builder.append(text);
|
text_node.builder.append(text);
|
||||||
node.children.append(make<Node>(offset, move(text_node), m_entered_node));
|
node.children.append(make<Node>(position, move(text_node), m_entered_node));
|
||||||
},
|
},
|
||||||
[&](auto&) {
|
[&](auto&) {
|
||||||
// Can't enter a text or comment node.
|
// Can't enter a text or comment node.
|
||||||
|
@ -138,7 +113,7 @@ void Parser::append_text(StringView text, Offset offset)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void Parser::append_comment(StringView text, Offset offset)
|
void Parser::append_comment(StringView text, LineTrackingLexer::Position position)
|
||||||
{
|
{
|
||||||
if (m_listener) {
|
if (m_listener) {
|
||||||
m_listener->comment(text);
|
m_listener->comment(text);
|
||||||
|
@ -152,7 +127,7 @@ void Parser::append_comment(StringView text, Offset offset)
|
||||||
|
|
||||||
m_entered_node->content.visit(
|
m_entered_node->content.visit(
|
||||||
[&](Node::Element& node) {
|
[&](Node::Element& node) {
|
||||||
node.children.append(make<Node>(offset, Node::Comment { text }, m_entered_node));
|
node.children.append(make<Node>(position, Node::Comment { text }, m_entered_node));
|
||||||
},
|
},
|
||||||
[&](auto&) {
|
[&](auto&) {
|
||||||
// Can't enter a text or comment node.
|
// Can't enter a text or comment node.
|
||||||
|
@ -507,7 +482,7 @@ ErrorOr<void, ParseError> Parser::parse_comment()
|
||||||
TRY(expect("-->"sv));
|
TRY(expect("-->"sv));
|
||||||
|
|
||||||
if (m_options.preserve_comments)
|
if (m_options.preserve_comments)
|
||||||
append_comment(text, m_lexer.offset_for(comment_start));
|
append_comment(text, m_lexer.position_for(comment_start));
|
||||||
|
|
||||||
rollback.disarm();
|
rollback.disarm();
|
||||||
return {};
|
return {};
|
||||||
|
@ -699,7 +674,7 @@ ErrorOr<NonnullOwnPtr<Node>, ParseError> Parser::parse_empty_element_tag()
|
||||||
TRY(expect("/>"sv));
|
TRY(expect("/>"sv));
|
||||||
|
|
||||||
rollback.disarm();
|
rollback.disarm();
|
||||||
return make<Node>(m_lexer.offset_for(tag_start), Node::Element { move(name), move(attributes), {} });
|
return make<Node>(m_lexer.position_for(tag_start), Node::Element { move(name), move(attributes), {} });
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3.1.41. Attribute, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Attribute
|
// 3.1.41. Attribute, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-Attribute
|
||||||
|
@ -851,7 +826,7 @@ ErrorOr<NonnullOwnPtr<Node>, ParseError> Parser::parse_start_tag()
|
||||||
TRY(expect(">"sv));
|
TRY(expect(">"sv));
|
||||||
|
|
||||||
rollback.disarm();
|
rollback.disarm();
|
||||||
return make<Node>(m_lexer.offset_for(tag_start), Node::Element { move(name), move(attributes), {} });
|
return make<Node>(m_lexer.position_for(tag_start), Node::Element { move(name), move(attributes), {} });
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3.1.42 ETag, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ETag
|
// 3.1.42 ETag, https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-ETag
|
||||||
|
@ -881,7 +856,7 @@ ErrorOr<void, ParseError> Parser::parse_content()
|
||||||
// content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
|
// content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)*
|
||||||
auto content_start = m_lexer.tell();
|
auto content_start = m_lexer.tell();
|
||||||
if (auto result = parse_char_data(); !result.is_error())
|
if (auto result = parse_char_data(); !result.is_error())
|
||||||
append_text(result.release_value(), m_lexer.offset_for(content_start));
|
append_text(result.release_value(), m_lexer.position_for(content_start));
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
auto node_start = m_lexer.tell();
|
auto node_start = m_lexer.tell();
|
||||||
|
@ -890,7 +865,7 @@ ErrorOr<void, ParseError> Parser::parse_content()
|
||||||
goto try_char_data;
|
goto try_char_data;
|
||||||
if (auto result = parse_reference(); !result.is_error()) {
|
if (auto result = parse_reference(); !result.is_error()) {
|
||||||
auto reference = result.release_value();
|
auto reference = result.release_value();
|
||||||
auto reference_offset = m_lexer.offset_for(node_start);
|
auto reference_offset = m_lexer.position_for(node_start);
|
||||||
if (auto char_reference = reference.get_pointer<ByteString>())
|
if (auto char_reference = reference.get_pointer<ByteString>())
|
||||||
append_text(*char_reference, reference_offset);
|
append_text(*char_reference, reference_offset);
|
||||||
else
|
else
|
||||||
|
@ -899,7 +874,7 @@ ErrorOr<void, ParseError> Parser::parse_content()
|
||||||
}
|
}
|
||||||
if (auto result = parse_cdata_section(); !result.is_error()) {
|
if (auto result = parse_cdata_section(); !result.is_error()) {
|
||||||
if (m_options.preserve_cdata)
|
if (m_options.preserve_cdata)
|
||||||
append_text(result.release_value(), m_lexer.offset_for(node_start));
|
append_text(result.release_value(), m_lexer.position_for(node_start));
|
||||||
goto try_char_data;
|
goto try_char_data;
|
||||||
}
|
}
|
||||||
if (auto result = parse_processing_instruction(); !result.is_error())
|
if (auto result = parse_processing_instruction(); !result.is_error())
|
||||||
|
@ -911,7 +886,7 @@ ErrorOr<void, ParseError> Parser::parse_content()
|
||||||
|
|
||||||
try_char_data:;
|
try_char_data:;
|
||||||
if (auto result = parse_char_data(); !result.is_error())
|
if (auto result = parse_char_data(); !result.is_error())
|
||||||
append_text(result.release_value(), m_lexer.offset_for(node_start));
|
append_text(result.release_value(), m_lexer.position_for(node_start));
|
||||||
}
|
}
|
||||||
|
|
||||||
rollback.disarm();
|
rollback.disarm();
|
||||||
|
|
|
@ -39,29 +39,6 @@ struct Listener {
|
||||||
virtual void error(ParseError const&) { }
|
virtual void error(ParseError const&) { }
|
||||||
};
|
};
|
||||||
|
|
||||||
// FIXME: This is also used in JSSpecCompiler, so should probably live in AK or even merged with
|
|
||||||
// AK::GenericLexer.
|
|
||||||
class LineTrackingLexer : public GenericLexer {
|
|
||||||
public:
|
|
||||||
using GenericLexer::GenericLexer;
|
|
||||||
|
|
||||||
LineTrackingLexer(StringView input, XML::Offset start_offset)
|
|
||||||
: GenericLexer(input)
|
|
||||||
, m_cached_offset {
|
|
||||||
.line = start_offset.line,
|
|
||||||
.column = start_offset.column,
|
|
||||||
}
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
Offset cached_offset() const { return m_cached_offset; }
|
|
||||||
void restore_cached_offset(Offset cached_offset) { m_cached_offset = cached_offset; }
|
|
||||||
Offset offset_for(size_t) const;
|
|
||||||
|
|
||||||
protected:
|
|
||||||
mutable Offset m_cached_offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
class Parser {
|
class Parser {
|
||||||
public:
|
public:
|
||||||
struct Options {
|
struct Options {
|
||||||
|
@ -96,8 +73,8 @@ private:
|
||||||
|
|
||||||
ErrorOr<void, ParseError> parse_internal();
|
ErrorOr<void, ParseError> parse_internal();
|
||||||
void append_node(NonnullOwnPtr<Node>);
|
void append_node(NonnullOwnPtr<Node>);
|
||||||
void append_text(StringView, Offset);
|
void append_text(StringView, LineTrackingLexer::Position);
|
||||||
void append_comment(StringView, Offset);
|
void append_comment(StringView, LineTrackingLexer::Position);
|
||||||
void enter_node(Node&);
|
void enter_node(Node&);
|
||||||
void leave_node();
|
void leave_node();
|
||||||
|
|
||||||
|
@ -170,9 +147,9 @@ private:
|
||||||
[[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current())
|
[[nodiscard]] auto rollback_point(SourceLocation location = SourceLocation::current())
|
||||||
{
|
{
|
||||||
return ArmedScopeGuard {
|
return ArmedScopeGuard {
|
||||||
[this, position = m_lexer.tell(), cached_offset = m_lexer.cached_offset(), location] {
|
[this, position = m_lexer.tell(), cached_position = m_lexer.cached_position(), location] {
|
||||||
m_lexer.retreat(m_lexer.tell() - position);
|
m_lexer.retreat(m_lexer.tell() - position);
|
||||||
m_lexer.restore_cached_offset(cached_offset);
|
m_lexer.restore_cached_offset(cached_position);
|
||||||
(void)location;
|
(void)location;
|
||||||
dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All));
|
dbgln_if(XML_PARSER_DEBUG, "{:->{}}FAIL @ {} -- \x1b[31m{}\x1b[0m", " ", s_debug_indent_level * 2, location, m_lexer.remaining().substring_view(0, min(16, m_lexer.tell_remaining())).replace("\n"sv, "\\n"sv, ReplaceMode::All));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue