1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-20 16:55:08 +00:00

JSSpecCompiler: Get rid of ParseError in Lexer

This commit is contained in:
Dan Klishch 2024-01-20 19:47:02 -05:00 committed by Andrew Kaster
parent ed04aff1de
commit 9a2337f7ad
5 changed files with 125 additions and 88 deletions

View file

@ -43,9 +43,8 @@ bool can_end_word_token(char c)
{ {
return is_ascii_space(c) || ".,"sv.contains(c); return is_ascii_space(c) || ".,"sv.contains(c);
} }
}
ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens) void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens)
{ {
static constexpr struct { static constexpr struct {
StringView text_to_match; StringView text_to_match;
@ -103,74 +102,131 @@ ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node c
if (word.length()) if (word.length())
tokens.append({ TokenType::Word, word, node, move(token_location) }); tokens.append({ TokenType::Word, word, node, move(token_location) });
} }
return {};
} }
ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps) enum class TreeType {
AlgorithmStep,
Header,
};
struct TokenizerState {
Vector<Token> tokens;
XML::Node const* substeps = nullptr;
bool has_errors = false;
};
void tokenize_tree(SpecificationParsingContext& ctx, TokenizerState& state, XML::Node const* node, TreeType tree_type)
{ {
TokenizeTreeResult result; // FIXME: Use structured binding once macOS Lagom CI updates to Clang >= 16.
auto& tokens = result.tokens; auto& tokens = state.tokens;
auto& substeps = state.substeps;
auto& has_errors = state.has_errors;
for (auto const& child : node->as_element().children) { for (auto const& child : node->as_element().children) {
TRY(child->content.visit( if (has_errors)
[&](XML::Node::Element const& element) -> ParseErrorOr<void> { break;
if (result.substeps != nullptr)
return ParseError::create("Substeps list must be the last non-empty child"sv, child);
child->content.visit(
[&](XML::Node::Element const& element) -> void {
Location child_location = ctx.location_from_xml_offset(child->offset); Location child_location = ctx.location_from_xml_offset(child->offset);
auto report_error = [&]<typename... Parameters>(AK::CheckedFormatString<Parameters...>&& fmt, Parameters const&... parameters) {
ctx.diag().error(child_location, move(fmt), parameters...);
has_errors = true;
};
if (element.name == tag_var) { if (substeps) {
tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) }); report_error("substeps list must be the last child of algorithm step");
return {}; return;
} }
if (element.name == tag_span) { if (element.name == tag_var) {
auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class)); auto variable_name = get_text_contents(child);
if (element_class != class_secnum) if (!variable_name.has_value())
return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child); report_error("malformed <var> subtree, expected single text child node");
tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) });
return {}; tokens.append({ TokenType::Identifier, variable_name.value_or(""sv), child, move(child_location) });
return;
} }
if (element.name == tag_emu_val) { if (element.name == tag_emu_val) {
auto contents = TRY(get_text_contents(child)); auto maybe_contents = get_text_contents(child);
if (!maybe_contents.has_value())
report_error("malformed <emu-val> subtree, expected single text child node");
auto contents = maybe_contents.value_or(""sv);
if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"')) if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"'))
tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) }); tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) });
else if (contents == "undefined") else if (contents == "undefined")
tokens.append({ TokenType::Undefined, contents, child, move(child_location) }); tokens.append({ TokenType::Undefined, contents, child, move(child_location) });
else else
tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); tokens.append({ TokenType::Identifier, contents, child, move(child_location) });
return {}; return;
} }
if (element.name == tag_emu_xref) { if (element.name == tag_emu_xref) {
auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv)))); auto identifier = get_single_child_with_tag(child, "a"sv).map([](XML::Node const* node) {
tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); return get_text_contents(node).value_or(""sv);
return {}; });
if (!identifier.has_value() || identifier.value().is_empty())
report_error("malformed <emu-xref> subtree, expected <a> with nested single text node");
tokens.append({ TokenType::Identifier, identifier.value_or(""sv), child, move(child_location) });
return;
} }
if (element.name == tag_ol) { if (tree_type == TreeType::Header && element.name == tag_span) {
if (!allow_substeps) auto element_class = get_attribute_by_name(child, attribute_class);
return ParseError::create("Found nested list but substeps are not allowed"sv, child); if (element_class != class_secnum)
result.substeps = child; report_error("expected <span> to have class='secnum' attribute");
return {};
auto section_number = get_text_contents(child);
if (!section_number.has_value())
report_error("malformed section number span subtree, expected single text child node");
tokens.append({ TokenType::SectionNumber, section_number.value_or(""sv), child, move(child_location) });
return;
} }
return ParseError::create(String::formatted("Unexpected child element with tag {}", element.name), child); if (tree_type == TreeType::AlgorithmStep && element.name == tag_ol) {
substeps = child;
return;
}
report_error("<{}> should not be a child of algorithm step", element.name);
}, },
[&](XML::Node::Text const& text) -> ParseErrorOr<void> { [&](XML::Node::Text const& text) {
auto view = text.builder.string_view(); auto view = text.builder.string_view();
if (result.substeps && !contains_empty_text(child)) if (substeps != nullptr && !contains_empty_text(child)) {
return ParseError::create("Substeps list must be the last non-empty child"sv, child); ctx.diag().error(ctx.location_from_xml_offset(child->offset),
return tokenize_string(ctx, child, view, tokens); "substeps list must be the last child of algorithm step");
} else {
tokenize_string(ctx, child, view, tokens);
}
}, },
move(ignore_comments))); [&](auto const&) {});
} }
if (tokens.size() && tokens.last().type == TokenType::MemberAccess) if (tokens.size() && tokens.last().type == TokenType::MemberAccess)
tokens.last().type = TokenType::Dot; tokens.last().type = TokenType::Dot;
}
}
return result; StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node)
{
TokenizerState state;
tokenize_tree(ctx, state, node, TreeType::AlgorithmStep);
return {
.tokens = state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { move(state.tokens) },
.substeps = state.substeps,
};
}
Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node)
{
TokenizerState state;
tokenize_tree(ctx, state, node, TreeType::Header);
return state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { state.tokens };
} }
} }

View file

@ -31,13 +31,12 @@ inline constexpr StringView attribute_id = "id"sv;
inline constexpr StringView class_secnum = "secnum"sv; inline constexpr StringView class_secnum = "secnum"sv;
ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens); struct StepTokenizationResult {
Optional<Vector<Token>> tokens;
struct TokenizeTreeResult {
Vector<Token> tokens;
XML::Node const* substeps = nullptr; XML::Node const* substeps = nullptr;
}; };
ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false); StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node);
Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node);
} }

View file

@ -64,16 +64,9 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
{ {
VERIFY(element->as_element().name == tag_li); VERIFY(element->as_element().name == tag_li);
auto tokenization_result = tokenize_tree(ctx, element, true); auto [maybe_tokens, substeps] = tokenize_step(ctx, element);
if (tokenization_result.is_error()) {
ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()),
"{}", tokenization_result.error()->to_string());
return {};
}
auto [tokens, substeps] = tokenization_result.release_value();
AlgorithmStep result(ctx); AlgorithmStep result(ctx);
result.m_tokens = move(tokens);
result.m_node = element; result.m_node = element;
if (substeps) { if (substeps) {
@ -86,6 +79,10 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx,
result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree; result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree;
} }
if (!maybe_tokens.has_value())
return {};
result.m_tokens = maybe_tokens.release_value();
if (!result.parse()) if (!result.parse())
return {}; return {};
return result; return result;
@ -260,14 +257,11 @@ Optional<FailedTextParseDiagnostic> SpecificationClause::parse_header(XML::Node
auto& ctx = *m_ctx_pointer; auto& ctx = *m_ctx_pointer;
VERIFY(element->as_element().name == tag_h1); VERIFY(element->as_element().name == tag_h1);
auto tokenization_result = tokenize_tree(ctx, element, false); auto maybe_tokens = tokenize_header(ctx, element);
if (tokenization_result.is_error()) { if (!maybe_tokens.has_value())
return FailedTextParseDiagnostic { return {};
ctx.location_from_xml_offset(tokenization_result.error()->offset()),
tokenization_result.error()->to_string() auto const& tokens = maybe_tokens.release_value();
};
}
auto const& tokens = tokenization_result.release_value().tokens;
TextParser parser(ctx, tokens, element); TextParser parser(ctx, tokens, element);
auto parse_result = parser.parse_clause_header(); auto parse_result = parser.parse_clause_header();
@ -289,6 +283,7 @@ void SpecificationClause::parse(XML::Node const* element)
auto& ctx = context(); auto& ctx = context();
u32 child_index = 0; u32 child_index = 0;
bool node_ignored_warning_issued = false;
Optional<FailedTextParseDiagnostic> header_parse_error; Optional<FailedTextParseDiagnostic> header_parse_error;
for (auto const& child : element->as_element().children) { for (auto const& child : element->as_element().children) {
@ -312,9 +307,11 @@ void SpecificationClause::parse(XML::Node const* element)
m_subclauses.append(create(ctx, child)); m_subclauses.append(create(ctx, child));
return; return;
} }
if (header_parse_error.has_value()) { if (!node_ignored_warning_issued && m_header.header.has<AK::Empty>()) {
node_ignored_warning_issued = true;
ctx.diag().warn(ctx.location_from_xml_offset(child->offset), ctx.diag().warn(ctx.location_from_xml_offset(child->offset),
"node content will be ignored since section header was not parsed successfully"); "node content will be ignored since section header was not parsed successfully");
if (header_parse_error.has_value())
ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message); ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message);
} }
} }

View file

@ -16,15 +16,6 @@ bool contains_empty_text(XML::Node const* node)
return node->as_text().builder.string_view().trim_whitespace().is_empty(); return node->as_text().builder.string_view().trim_whitespace().is_empty();
} }
ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name)
{
auto const& attribute = node->as_element().attributes.get(attribute_name);
if (!attribute.has_value())
return ParseError::create(String::formatted("Attribute {} is not present", attribute_name), node);
return attribute.value();
}
Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name) Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name)
{ {
auto const& attribute = node->as_element().attributes.get(attribute_name); auto const& attribute = node->as_element().attributes.get(attribute_name);
@ -34,39 +25,34 @@ Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView att
return attribute.value(); return attribute.value();
} }
ParseErrorOr<StringView> get_text_contents(XML::Node const* node) Optional<StringView> get_text_contents(XML::Node const* node)
{ {
auto const& children = node->as_element().children; auto const& children = node->as_element().children;
if (children.size() != 1 || !children[0]->is_text()) if (children.size() != 1 || !children[0]->is_text())
return ParseError::create("Expected single text node in a child list of the node"sv, node); return {};
return children[0]->as_text().builder.string_view(); return children[0]->as_text().builder.string_view();
} }
ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name) Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name)
{ {
XML::Node const* result = nullptr; XML::Node const* result = nullptr;
for (auto const& child : element->as_element().children) { for (auto const& child : element->as_element().children) {
TRY(child->content.visit( auto is_valid = child->content.visit(
[&](XML::Node::Element const& element) -> ParseErrorOr<void> { [&](XML::Node::Element const& element) {
if (element.name != tag_name)
return ParseError::create(String::formatted("Expected child with the tag name {} but found {}", tag_name, element.name), child);
if (result != nullptr)
return ParseError::create("Element must have only one child"sv, child);
result = child; result = child;
return {}; return result != nullptr || element.name != tag_name;
}, },
[&](XML::Node::Text const&) -> ParseErrorOr<void> { [&](XML::Node::Text const&) {
if (!contains_empty_text(child)) return contains_empty_text(child);
return ParseError::create("Element should not have non-empty child text nodes"sv, element);
return {};
}, },
move(ignore_comments))); [&](auto const&) { return true; });
if (!is_valid)
return {};
} }
if (result == nullptr) if (result == nullptr)
return ParseError::create(String::formatted("Element must have only one child"), element); return {};
return result; return result;
} }

View file

@ -20,11 +20,10 @@ inline constexpr IgnoreComments ignore_comments {};
bool contains_empty_text(XML::Node const* node); bool contains_empty_text(XML::Node const* node);
ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name);
Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name); Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name);
ParseErrorOr<StringView> get_text_contents(XML::Node const* node); Optional<StringView> get_text_contents(XML::Node const* node);
ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name); Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name);
} }