mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 12:32:43 +00:00 
			
		
		
		
	JSSpecCompiler: Get rid of ParseError in Lexer
This commit is contained in:
		
							parent
							
								
									ed04aff1de
								
							
						
					
					
						commit
						9a2337f7ad
					
				
					 5 changed files with 125 additions and 88 deletions
				
			
		|  | @ -43,9 +43,8 @@ bool can_end_word_token(char c) | |||
| { | ||||
|     return is_ascii_space(c) || ".,"sv.contains(c); | ||||
| } | ||||
| } | ||||
| 
 | ||||
| ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens) | ||||
| void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens) | ||||
| { | ||||
|     static constexpr struct { | ||||
|         StringView text_to_match; | ||||
|  | @ -103,74 +102,131 @@ ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node c | |||
|         if (word.length()) | ||||
|             tokens.append({ TokenType::Word, word, node, move(token_location) }); | ||||
|     } | ||||
|     return {}; | ||||
| } | ||||
| 
 | ||||
| ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps) | ||||
| enum class TreeType { | ||||
|     AlgorithmStep, | ||||
|     Header, | ||||
| }; | ||||
| 
 | ||||
| struct TokenizerState { | ||||
|     Vector<Token> tokens; | ||||
|     XML::Node const* substeps = nullptr; | ||||
|     bool has_errors = false; | ||||
| }; | ||||
| 
 | ||||
| void tokenize_tree(SpecificationParsingContext& ctx, TokenizerState& state, XML::Node const* node, TreeType tree_type) | ||||
| { | ||||
|     TokenizeTreeResult result; | ||||
|     auto& tokens = result.tokens; | ||||
|     // FIXME: Use structured binding once macOS Lagom CI updates to Clang >= 16.
 | ||||
|     auto& tokens = state.tokens; | ||||
|     auto& substeps = state.substeps; | ||||
|     auto& has_errors = state.has_errors; | ||||
| 
 | ||||
|     for (auto const& child : node->as_element().children) { | ||||
|         TRY(child->content.visit( | ||||
|             [&](XML::Node::Element const& element) -> ParseErrorOr<void> { | ||||
|                 if (result.substeps != nullptr) | ||||
|                     return ParseError::create("Substeps list must be the last non-empty child"sv, child); | ||||
|         if (has_errors) | ||||
|             break; | ||||
| 
 | ||||
|         child->content.visit( | ||||
|             [&](XML::Node::Element const& element) -> void { | ||||
|                 Location child_location = ctx.location_from_xml_offset(child->offset); | ||||
|                 auto report_error = [&]<typename... Parameters>(AK::CheckedFormatString<Parameters...>&& fmt, Parameters const&... parameters) { | ||||
|                     ctx.diag().error(child_location, move(fmt), parameters...); | ||||
|                     has_errors = true; | ||||
|                 }; | ||||
| 
 | ||||
|                 if (element.name == tag_var) { | ||||
|                     tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) }); | ||||
|                     return {}; | ||||
|                 if (substeps) { | ||||
|                     report_error("substeps list must be the last child of algorithm step"); | ||||
|                     return; | ||||
|                 } | ||||
| 
 | ||||
|                 if (element.name == tag_span) { | ||||
|                     auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class)); | ||||
|                     if (element_class != class_secnum) | ||||
|                         return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child); | ||||
|                     tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) }); | ||||
|                     return {}; | ||||
|                 if (element.name == tag_var) { | ||||
|                     auto variable_name = get_text_contents(child); | ||||
|                     if (!variable_name.has_value()) | ||||
|                         report_error("malformed <var> subtree, expected single text child node"); | ||||
| 
 | ||||
|                     tokens.append({ TokenType::Identifier, variable_name.value_or(""sv), child, move(child_location) }); | ||||
|                     return; | ||||
|                 } | ||||
| 
 | ||||
|                 if (element.name == tag_emu_val) { | ||||
|                     auto contents = TRY(get_text_contents(child)); | ||||
|                     auto maybe_contents = get_text_contents(child); | ||||
|                     if (!maybe_contents.has_value()) | ||||
|                         report_error("malformed <emu-val> subtree, expected single text child node"); | ||||
| 
 | ||||
|                     auto contents = maybe_contents.value_or(""sv); | ||||
| 
 | ||||
|                     if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"')) | ||||
|                         tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) }); | ||||
|                     else if (contents == "undefined") | ||||
|                         tokens.append({ TokenType::Undefined, contents, child, move(child_location) }); | ||||
|                     else | ||||
|                         tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); | ||||
|                     return {}; | ||||
|                     return; | ||||
|                 } | ||||
| 
 | ||||
|                 if (element.name == tag_emu_xref) { | ||||
|                     auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv)))); | ||||
|                     tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); | ||||
|                     return {}; | ||||
|                     auto identifier = get_single_child_with_tag(child, "a"sv).map([](XML::Node const* node) { | ||||
|                         return get_text_contents(node).value_or(""sv); | ||||
|                     }); | ||||
|                     if (!identifier.has_value() || identifier.value().is_empty()) | ||||
|                         report_error("malformed <emu-xref> subtree, expected <a> with nested single text node"); | ||||
| 
 | ||||
|                     tokens.append({ TokenType::Identifier, identifier.value_or(""sv), child, move(child_location) }); | ||||
|                     return; | ||||
|                 } | ||||
| 
 | ||||
|                 if (element.name == tag_ol) { | ||||
|                     if (!allow_substeps) | ||||
|                         return ParseError::create("Found nested list but substeps are not allowed"sv, child); | ||||
|                     result.substeps = child; | ||||
|                     return {}; | ||||
|                 if (tree_type == TreeType::Header && element.name == tag_span) { | ||||
|                     auto element_class = get_attribute_by_name(child, attribute_class); | ||||
|                     if (element_class != class_secnum) | ||||
|                         report_error("expected <span> to have class='secnum' attribute"); | ||||
| 
 | ||||
|                     auto section_number = get_text_contents(child); | ||||
|                     if (!section_number.has_value()) | ||||
|                         report_error("malformed section number span subtree, expected single text child node"); | ||||
| 
 | ||||
|                     tokens.append({ TokenType::SectionNumber, section_number.value_or(""sv), child, move(child_location) }); | ||||
|                     return; | ||||
|                 } | ||||
| 
 | ||||
|                 return ParseError::create(String::formatted("Unexpected child element with tag {}", element.name), child); | ||||
|                 if (tree_type == TreeType::AlgorithmStep && element.name == tag_ol) { | ||||
|                     substeps = child; | ||||
|                     return; | ||||
|                 } | ||||
| 
 | ||||
|                 report_error("<{}> should not be a child of algorithm step", element.name); | ||||
|             }, | ||||
|             [&](XML::Node::Text const& text) -> ParseErrorOr<void> { | ||||
|             [&](XML::Node::Text const& text) { | ||||
|                 auto view = text.builder.string_view(); | ||||
|                 if (result.substeps && !contains_empty_text(child)) | ||||
|                     return ParseError::create("Substeps list must be the last non-empty child"sv, child); | ||||
|                 return tokenize_string(ctx, child, view, tokens); | ||||
|                 if (substeps != nullptr && !contains_empty_text(child)) { | ||||
|                     ctx.diag().error(ctx.location_from_xml_offset(child->offset), | ||||
|                         "substeps list must be the last child of algorithm step"); | ||||
|                 } else { | ||||
|                     tokenize_string(ctx, child, view, tokens); | ||||
|                 } | ||||
|             }, | ||||
|             move(ignore_comments))); | ||||
|             [&](auto const&) {}); | ||||
|     } | ||||
| 
 | ||||
|     if (tokens.size() && tokens.last().type == TokenType::MemberAccess) | ||||
|         tokens.last().type = TokenType::Dot; | ||||
| } | ||||
| } | ||||
| 
 | ||||
|     return result; | ||||
| StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node) | ||||
| { | ||||
|     TokenizerState state; | ||||
|     tokenize_tree(ctx, state, node, TreeType::AlgorithmStep); | ||||
|     return { | ||||
|         .tokens = state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { move(state.tokens) }, | ||||
|         .substeps = state.substeps, | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
| Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node) | ||||
| { | ||||
|     TokenizerState state; | ||||
|     tokenize_tree(ctx, state, node, TreeType::Header); | ||||
|     return state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { state.tokens }; | ||||
| } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -31,13 +31,12 @@ inline constexpr StringView attribute_id = "id"sv; | |||
| 
 | ||||
| inline constexpr StringView class_secnum = "secnum"sv; | ||||
| 
 | ||||
| ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens); | ||||
| 
 | ||||
| struct TokenizeTreeResult { | ||||
|     Vector<Token> tokens; | ||||
| struct StepTokenizationResult { | ||||
|     Optional<Vector<Token>> tokens; | ||||
|     XML::Node const* substeps = nullptr; | ||||
| }; | ||||
| 
 | ||||
| ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false); | ||||
| StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node); | ||||
| Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node); | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -64,16 +64,9 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx, | |||
| { | ||||
|     VERIFY(element->as_element().name == tag_li); | ||||
| 
 | ||||
|     auto tokenization_result = tokenize_tree(ctx, element, true); | ||||
|     if (tokenization_result.is_error()) { | ||||
|         ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()), | ||||
|             "{}", tokenization_result.error()->to_string()); | ||||
|         return {}; | ||||
|     } | ||||
|     auto [maybe_tokens, substeps] = tokenize_step(ctx, element); | ||||
| 
 | ||||
|     auto [tokens, substeps] = tokenization_result.release_value(); | ||||
|     AlgorithmStep result(ctx); | ||||
|     result.m_tokens = move(tokens); | ||||
|     result.m_node = element; | ||||
| 
 | ||||
|     if (substeps) { | ||||
|  | @ -86,6 +79,10 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx, | |||
|         result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree; | ||||
|     } | ||||
| 
 | ||||
|     if (!maybe_tokens.has_value()) | ||||
|         return {}; | ||||
|     result.m_tokens = maybe_tokens.release_value(); | ||||
| 
 | ||||
|     if (!result.parse()) | ||||
|         return {}; | ||||
|     return result; | ||||
|  | @ -260,14 +257,11 @@ Optional<FailedTextParseDiagnostic> SpecificationClause::parse_header(XML::Node | |||
|     auto& ctx = *m_ctx_pointer; | ||||
|     VERIFY(element->as_element().name == tag_h1); | ||||
| 
 | ||||
|     auto tokenization_result = tokenize_tree(ctx, element, false); | ||||
|     if (tokenization_result.is_error()) { | ||||
|         return FailedTextParseDiagnostic { | ||||
|             ctx.location_from_xml_offset(tokenization_result.error()->offset()), | ||||
|             tokenization_result.error()->to_string() | ||||
|         }; | ||||
|     } | ||||
|     auto const& tokens = tokenization_result.release_value().tokens; | ||||
|     auto maybe_tokens = tokenize_header(ctx, element); | ||||
|     if (!maybe_tokens.has_value()) | ||||
|         return {}; | ||||
| 
 | ||||
|     auto const& tokens = maybe_tokens.release_value(); | ||||
| 
 | ||||
|     TextParser parser(ctx, tokens, element); | ||||
|     auto parse_result = parser.parse_clause_header(); | ||||
|  | @ -289,6 +283,7 @@ void SpecificationClause::parse(XML::Node const* element) | |||
|     auto& ctx = context(); | ||||
|     u32 child_index = 0; | ||||
| 
 | ||||
|     bool node_ignored_warning_issued = false; | ||||
|     Optional<FailedTextParseDiagnostic> header_parse_error; | ||||
| 
 | ||||
|     for (auto const& child : element->as_element().children) { | ||||
|  | @ -312,10 +307,12 @@ void SpecificationClause::parse(XML::Node const* element) | |||
|                         m_subclauses.append(create(ctx, child)); | ||||
|                         return; | ||||
|                     } | ||||
|                     if (header_parse_error.has_value()) { | ||||
|                     if (!node_ignored_warning_issued && m_header.header.has<AK::Empty>()) { | ||||
|                         node_ignored_warning_issued = true; | ||||
|                         ctx.diag().warn(ctx.location_from_xml_offset(child->offset), | ||||
|                             "node content will be ignored since section header was not parsed successfully"); | ||||
|                         ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message); | ||||
|                         if (header_parse_error.has_value()) | ||||
|                             ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message); | ||||
|                     } | ||||
|                 } | ||||
|                 ++child_index; | ||||
|  |  | |||
|  | @ -16,15 +16,6 @@ bool contains_empty_text(XML::Node const* node) | |||
|     return node->as_text().builder.string_view().trim_whitespace().is_empty(); | ||||
| } | ||||
| 
 | ||||
| ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name) | ||||
| { | ||||
|     auto const& attribute = node->as_element().attributes.get(attribute_name); | ||||
| 
 | ||||
|     if (!attribute.has_value()) | ||||
|         return ParseError::create(String::formatted("Attribute {} is not present", attribute_name), node); | ||||
|     return attribute.value(); | ||||
| } | ||||
| 
 | ||||
| Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name) | ||||
| { | ||||
|     auto const& attribute = node->as_element().attributes.get(attribute_name); | ||||
|  | @ -34,39 +25,34 @@ Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView att | |||
|     return attribute.value(); | ||||
| } | ||||
| 
 | ||||
| ParseErrorOr<StringView> get_text_contents(XML::Node const* node) | ||||
| Optional<StringView> get_text_contents(XML::Node const* node) | ||||
| { | ||||
|     auto const& children = node->as_element().children; | ||||
| 
 | ||||
|     if (children.size() != 1 || !children[0]->is_text()) | ||||
|         return ParseError::create("Expected single text node in a child list of the node"sv, node); | ||||
|         return {}; | ||||
|     return children[0]->as_text().builder.string_view(); | ||||
| } | ||||
| 
 | ||||
| ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name) | ||||
| Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name) | ||||
| { | ||||
|     XML::Node const* result = nullptr; | ||||
| 
 | ||||
|     for (auto const& child : element->as_element().children) { | ||||
|         TRY(child->content.visit( | ||||
|             [&](XML::Node::Element const& element) -> ParseErrorOr<void> { | ||||
|                 if (element.name != tag_name) | ||||
|                     return ParseError::create(String::formatted("Expected child with the tag name {} but found {}", tag_name, element.name), child); | ||||
|                 if (result != nullptr) | ||||
|                     return ParseError::create("Element must have only one child"sv, child); | ||||
|         auto is_valid = child->content.visit( | ||||
|             [&](XML::Node::Element const& element) { | ||||
|                 result = child; | ||||
|                 return {}; | ||||
|                 return result != nullptr || element.name != tag_name; | ||||
|             }, | ||||
|             [&](XML::Node::Text const&) -> ParseErrorOr<void> { | ||||
|                 if (!contains_empty_text(child)) | ||||
|                     return ParseError::create("Element should not have non-empty child text nodes"sv, element); | ||||
|                 return {}; | ||||
|             [&](XML::Node::Text const&) { | ||||
|                 return contains_empty_text(child); | ||||
|             }, | ||||
|             move(ignore_comments))); | ||||
|             [&](auto const&) { return true; }); | ||||
|         if (!is_valid) | ||||
|             return {}; | ||||
|     } | ||||
| 
 | ||||
|     if (result == nullptr) | ||||
|         return ParseError::create(String::formatted("Element must have only one child"), element); | ||||
|         return {}; | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -20,11 +20,10 @@ inline constexpr IgnoreComments ignore_comments {}; | |||
| 
 | ||||
| bool contains_empty_text(XML::Node const* node); | ||||
| 
 | ||||
| ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name); | ||||
| Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name); | ||||
| 
 | ||||
| ParseErrorOr<StringView> get_text_contents(XML::Node const* node); | ||||
| Optional<StringView> get_text_contents(XML::Node const* node); | ||||
| 
 | ||||
| ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name); | ||||
| Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name); | ||||
| 
 | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Dan Klishch
						Dan Klishch