mirror of
https://github.com/RGBCube/serenity
synced 2025-05-20 07:05:08 +00:00
JSSpecCompiler: Get rid of ParseError in Lexer
This commit is contained in:
parent
ed04aff1de
commit
9a2337f7ad
5 changed files with 125 additions and 88 deletions
|
@ -43,9 +43,8 @@ bool can_end_word_token(char c)
|
|||
{
|
||||
return is_ascii_space(c) || ".,"sv.contains(c);
|
||||
}
|
||||
}
|
||||
|
||||
ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
|
||||
void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
|
||||
{
|
||||
static constexpr struct {
|
||||
StringView text_to_match;
|
||||
|
@ -103,74 +102,131 @@ ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node c
|
|||
if (word.length())
|
||||
tokens.append({ TokenType::Word, word, node, move(token_location) });
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps)
|
||||
enum class TreeType {
|
||||
AlgorithmStep,
|
||||
Header,
|
||||
};
|
||||
|
||||
struct TokenizerState {
|
||||
Vector<Token> tokens;
|
||||
XML::Node const* substeps = nullptr;
|
||||
bool has_errors = false;
|
||||
};
|
||||
|
||||
void tokenize_tree(SpecificationParsingContext& ctx, TokenizerState& state, XML::Node const* node, TreeType tree_type)
|
||||
{
|
||||
TokenizeTreeResult result;
|
||||
auto& tokens = result.tokens;
|
||||
// FIXME: Use structured binding once macOS Lagom CI updates to Clang >= 16.
|
||||
auto& tokens = state.tokens;
|
||||
auto& substeps = state.substeps;
|
||||
auto& has_errors = state.has_errors;
|
||||
|
||||
for (auto const& child : node->as_element().children) {
|
||||
TRY(child->content.visit(
|
||||
[&](XML::Node::Element const& element) -> ParseErrorOr<void> {
|
||||
if (result.substeps != nullptr)
|
||||
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
|
||||
if (has_errors)
|
||||
break;
|
||||
|
||||
child->content.visit(
|
||||
[&](XML::Node::Element const& element) -> void {
|
||||
Location child_location = ctx.location_from_xml_offset(child->offset);
|
||||
auto report_error = [&]<typename... Parameters>(AK::CheckedFormatString<Parameters...>&& fmt, Parameters const&... parameters) {
|
||||
ctx.diag().error(child_location, move(fmt), parameters...);
|
||||
has_errors = true;
|
||||
};
|
||||
|
||||
if (element.name == tag_var) {
|
||||
tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) });
|
||||
return {};
|
||||
if (substeps) {
|
||||
report_error("substeps list must be the last child of algorithm step");
|
||||
return;
|
||||
}
|
||||
|
||||
if (element.name == tag_span) {
|
||||
auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class));
|
||||
if (element_class != class_secnum)
|
||||
return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child);
|
||||
tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) });
|
||||
return {};
|
||||
if (element.name == tag_var) {
|
||||
auto variable_name = get_text_contents(child);
|
||||
if (!variable_name.has_value())
|
||||
report_error("malformed <var> subtree, expected single text child node");
|
||||
|
||||
tokens.append({ TokenType::Identifier, variable_name.value_or(""sv), child, move(child_location) });
|
||||
return;
|
||||
}
|
||||
|
||||
if (element.name == tag_emu_val) {
|
||||
auto contents = TRY(get_text_contents(child));
|
||||
auto maybe_contents = get_text_contents(child);
|
||||
if (!maybe_contents.has_value())
|
||||
report_error("malformed <emu-val> subtree, expected single text child node");
|
||||
|
||||
auto contents = maybe_contents.value_or(""sv);
|
||||
|
||||
if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"'))
|
||||
tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) });
|
||||
else if (contents == "undefined")
|
||||
tokens.append({ TokenType::Undefined, contents, child, move(child_location) });
|
||||
else
|
||||
tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
|
||||
return {};
|
||||
return;
|
||||
}
|
||||
|
||||
if (element.name == tag_emu_xref) {
|
||||
auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv))));
|
||||
tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
|
||||
return {};
|
||||
auto identifier = get_single_child_with_tag(child, "a"sv).map([](XML::Node const* node) {
|
||||
return get_text_contents(node).value_or(""sv);
|
||||
});
|
||||
if (!identifier.has_value() || identifier.value().is_empty())
|
||||
report_error("malformed <emu-xref> subtree, expected <a> with nested single text node");
|
||||
|
||||
tokens.append({ TokenType::Identifier, identifier.value_or(""sv), child, move(child_location) });
|
||||
return;
|
||||
}
|
||||
|
||||
if (element.name == tag_ol) {
|
||||
if (!allow_substeps)
|
||||
return ParseError::create("Found nested list but substeps are not allowed"sv, child);
|
||||
result.substeps = child;
|
||||
return {};
|
||||
if (tree_type == TreeType::Header && element.name == tag_span) {
|
||||
auto element_class = get_attribute_by_name(child, attribute_class);
|
||||
if (element_class != class_secnum)
|
||||
report_error("expected <span> to have class='secnum' attribute");
|
||||
|
||||
auto section_number = get_text_contents(child);
|
||||
if (!section_number.has_value())
|
||||
report_error("malformed section number span subtree, expected single text child node");
|
||||
|
||||
tokens.append({ TokenType::SectionNumber, section_number.value_or(""sv), child, move(child_location) });
|
||||
return;
|
||||
}
|
||||
|
||||
return ParseError::create(String::formatted("Unexpected child element with tag {}", element.name), child);
|
||||
if (tree_type == TreeType::AlgorithmStep && element.name == tag_ol) {
|
||||
substeps = child;
|
||||
return;
|
||||
}
|
||||
|
||||
report_error("<{}> should not be a child of algorithm step", element.name);
|
||||
},
|
||||
[&](XML::Node::Text const& text) -> ParseErrorOr<void> {
|
||||
[&](XML::Node::Text const& text) {
|
||||
auto view = text.builder.string_view();
|
||||
if (result.substeps && !contains_empty_text(child))
|
||||
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
|
||||
return tokenize_string(ctx, child, view, tokens);
|
||||
if (substeps != nullptr && !contains_empty_text(child)) {
|
||||
ctx.diag().error(ctx.location_from_xml_offset(child->offset),
|
||||
"substeps list must be the last child of algorithm step");
|
||||
} else {
|
||||
tokenize_string(ctx, child, view, tokens);
|
||||
}
|
||||
},
|
||||
move(ignore_comments)));
|
||||
[&](auto const&) {});
|
||||
}
|
||||
|
||||
if (tokens.size() && tokens.last().type == TokenType::MemberAccess)
|
||||
tokens.last().type = TokenType::Dot;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node)
|
||||
{
|
||||
TokenizerState state;
|
||||
tokenize_tree(ctx, state, node, TreeType::AlgorithmStep);
|
||||
return {
|
||||
.tokens = state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { move(state.tokens) },
|
||||
.substeps = state.substeps,
|
||||
};
|
||||
}
|
||||
|
||||
Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node)
|
||||
{
|
||||
TokenizerState state;
|
||||
tokenize_tree(ctx, state, node, TreeType::Header);
|
||||
return state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { state.tokens };
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -31,13 +31,12 @@ inline constexpr StringView attribute_id = "id"sv;
|
|||
|
||||
inline constexpr StringView class_secnum = "secnum"sv;
|
||||
|
||||
ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens);
|
||||
|
||||
struct TokenizeTreeResult {
|
||||
Vector<Token> tokens;
|
||||
struct StepTokenizationResult {
|
||||
Optional<Vector<Token>> tokens;
|
||||
XML::Node const* substeps = nullptr;
|
||||
};
|
||||
|
||||
ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false);
|
||||
StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node);
|
||||
Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node);
|
||||
|
||||
}
|
||||
|
|
|
@ -64,16 +64,9 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
|
|||
{
|
||||
VERIFY(element->as_element().name == tag_li);
|
||||
|
||||
auto tokenization_result = tokenize_tree(ctx, element, true);
|
||||
if (tokenization_result.is_error()) {
|
||||
ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()),
|
||||
"{}", tokenization_result.error()->to_string());
|
||||
return {};
|
||||
}
|
||||
auto [maybe_tokens, substeps] = tokenize_step(ctx, element);
|
||||
|
||||
auto [tokens, substeps] = tokenization_result.release_value();
|
||||
AlgorithmStep result(ctx);
|
||||
result.m_tokens = move(tokens);
|
||||
result.m_node = element;
|
||||
|
||||
if (substeps) {
|
||||
|
@ -86,6 +79,10 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
|
|||
result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree;
|
||||
}
|
||||
|
||||
if (!maybe_tokens.has_value())
|
||||
return {};
|
||||
result.m_tokens = maybe_tokens.release_value();
|
||||
|
||||
if (!result.parse())
|
||||
return {};
|
||||
return result;
|
||||
|
@ -260,14 +257,11 @@ Optional<FailedTextParseDiagnostic> SpecificationClause::parse_header(XML::Node
|
|||
auto& ctx = *m_ctx_pointer;
|
||||
VERIFY(element->as_element().name == tag_h1);
|
||||
|
||||
auto tokenization_result = tokenize_tree(ctx, element, false);
|
||||
if (tokenization_result.is_error()) {
|
||||
return FailedTextParseDiagnostic {
|
||||
ctx.location_from_xml_offset(tokenization_result.error()->offset()),
|
||||
tokenization_result.error()->to_string()
|
||||
};
|
||||
}
|
||||
auto const& tokens = tokenization_result.release_value().tokens;
|
||||
auto maybe_tokens = tokenize_header(ctx, element);
|
||||
if (!maybe_tokens.has_value())
|
||||
return {};
|
||||
|
||||
auto const& tokens = maybe_tokens.release_value();
|
||||
|
||||
TextParser parser(ctx, tokens, element);
|
||||
auto parse_result = parser.parse_clause_header();
|
||||
|
@ -289,6 +283,7 @@ void SpecificationClause::parse(XML::Node const* element)
|
|||
auto& ctx = context();
|
||||
u32 child_index = 0;
|
||||
|
||||
bool node_ignored_warning_issued = false;
|
||||
Optional<FailedTextParseDiagnostic> header_parse_error;
|
||||
|
||||
for (auto const& child : element->as_element().children) {
|
||||
|
@ -312,9 +307,11 @@ void SpecificationClause::parse(XML::Node const* element)
|
|||
m_subclauses.append(create(ctx, child));
|
||||
return;
|
||||
}
|
||||
if (header_parse_error.has_value()) {
|
||||
if (!node_ignored_warning_issued && m_header.header.has<AK::Empty>()) {
|
||||
node_ignored_warning_issued = true;
|
||||
ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
|
||||
"node content will be ignored since section header was not parsed successfully");
|
||||
if (header_parse_error.has_value())
|
||||
ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,15 +16,6 @@ bool contains_empty_text(XML::Node const* node)
|
|||
return node->as_text().builder.string_view().trim_whitespace().is_empty();
|
||||
}
|
||||
|
||||
ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name)
|
||||
{
|
||||
auto const& attribute = node->as_element().attributes.get(attribute_name);
|
||||
|
||||
if (!attribute.has_value())
|
||||
return ParseError::create(String::formatted("Attribute {} is not present", attribute_name), node);
|
||||
return attribute.value();
|
||||
}
|
||||
|
||||
Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name)
|
||||
{
|
||||
auto const& attribute = node->as_element().attributes.get(attribute_name);
|
||||
|
@ -34,39 +25,34 @@ Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView att
|
|||
return attribute.value();
|
||||
}
|
||||
|
||||
ParseErrorOr<StringView> get_text_contents(XML::Node const* node)
|
||||
Optional<StringView> get_text_contents(XML::Node const* node)
|
||||
{
|
||||
auto const& children = node->as_element().children;
|
||||
|
||||
if (children.size() != 1 || !children[0]->is_text())
|
||||
return ParseError::create("Expected single text node in a child list of the node"sv, node);
|
||||
return {};
|
||||
return children[0]->as_text().builder.string_view();
|
||||
}
|
||||
|
||||
ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name)
|
||||
Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name)
|
||||
{
|
||||
XML::Node const* result = nullptr;
|
||||
|
||||
for (auto const& child : element->as_element().children) {
|
||||
TRY(child->content.visit(
|
||||
[&](XML::Node::Element const& element) -> ParseErrorOr<void> {
|
||||
if (element.name != tag_name)
|
||||
return ParseError::create(String::formatted("Expected child with the tag name {} but found {}", tag_name, element.name), child);
|
||||
if (result != nullptr)
|
||||
return ParseError::create("Element must have only one child"sv, child);
|
||||
auto is_valid = child->content.visit(
|
||||
[&](XML::Node::Element const& element) {
|
||||
result = child;
|
||||
return {};
|
||||
return result != nullptr || element.name != tag_name;
|
||||
},
|
||||
[&](XML::Node::Text const&) -> ParseErrorOr<void> {
|
||||
if (!contains_empty_text(child))
|
||||
return ParseError::create("Element should not have non-empty child text nodes"sv, element);
|
||||
return {};
|
||||
[&](XML::Node::Text const&) {
|
||||
return contains_empty_text(child);
|
||||
},
|
||||
move(ignore_comments)));
|
||||
[&](auto const&) { return true; });
|
||||
if (!is_valid)
|
||||
return {};
|
||||
}
|
||||
|
||||
if (result == nullptr)
|
||||
return ParseError::create(String::formatted("Element must have only one child"), element);
|
||||
return {};
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
@ -20,11 +20,10 @@ inline constexpr IgnoreComments ignore_comments {};
|
|||
|
||||
bool contains_empty_text(XML::Node const* node);
|
||||
|
||||
ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name);
|
||||
Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name);
|
||||
|
||||
ParseErrorOr<StringView> get_text_contents(XML::Node const* node);
|
||||
Optional<StringView> get_text_contents(XML::Node const* node);
|
||||
|
||||
ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name);
|
||||
Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name);
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue