mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 21:52:45 +00:00 
			
		
		
		
	JSSpecCompiler: Get rid of ParseError in Lexer
This commit is contained in:
		
							parent
							
								
									ed04aff1de
								
							
						
					
					
						commit
						9a2337f7ad
					
				
					 5 changed files with 125 additions and 88 deletions
				
			
		|  | @ -43,9 +43,8 @@ bool can_end_word_token(char c) | ||||||
| { | { | ||||||
|     return is_ascii_space(c) || ".,"sv.contains(c); |     return is_ascii_space(c) || ".,"sv.contains(c); | ||||||
| } | } | ||||||
| } |  | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens) | void tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens) | ||||||
| { | { | ||||||
|     static constexpr struct { |     static constexpr struct { | ||||||
|         StringView text_to_match; |         StringView text_to_match; | ||||||
|  | @ -103,74 +102,131 @@ ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node c | ||||||
|         if (word.length()) |         if (word.length()) | ||||||
|             tokens.append({ TokenType::Word, word, node, move(token_location) }); |             tokens.append({ TokenType::Word, word, node, move(token_location) }); | ||||||
|     } |     } | ||||||
|     return {}; |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps) | enum class TreeType { | ||||||
|  |     AlgorithmStep, | ||||||
|  |     Header, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct TokenizerState { | ||||||
|  |     Vector<Token> tokens; | ||||||
|  |     XML::Node const* substeps = nullptr; | ||||||
|  |     bool has_errors = false; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | void tokenize_tree(SpecificationParsingContext& ctx, TokenizerState& state, XML::Node const* node, TreeType tree_type) | ||||||
| { | { | ||||||
|     TokenizeTreeResult result; |     // FIXME: Use structured binding once macOS Lagom CI updates to Clang >= 16.
 | ||||||
|     auto& tokens = result.tokens; |     auto& tokens = state.tokens; | ||||||
|  |     auto& substeps = state.substeps; | ||||||
|  |     auto& has_errors = state.has_errors; | ||||||
| 
 | 
 | ||||||
|     for (auto const& child : node->as_element().children) { |     for (auto const& child : node->as_element().children) { | ||||||
|         TRY(child->content.visit( |         if (has_errors) | ||||||
|             [&](XML::Node::Element const& element) -> ParseErrorOr<void> { |             break; | ||||||
|                 if (result.substeps != nullptr) |  | ||||||
|                     return ParseError::create("Substeps list must be the last non-empty child"sv, child); |  | ||||||
| 
 | 
 | ||||||
|  |         child->content.visit( | ||||||
|  |             [&](XML::Node::Element const& element) -> void { | ||||||
|                 Location child_location = ctx.location_from_xml_offset(child->offset); |                 Location child_location = ctx.location_from_xml_offset(child->offset); | ||||||
|  |                 auto report_error = [&]<typename... Parameters>(AK::CheckedFormatString<Parameters...>&& fmt, Parameters const&... parameters) { | ||||||
|  |                     ctx.diag().error(child_location, move(fmt), parameters...); | ||||||
|  |                     has_errors = true; | ||||||
|  |                 }; | ||||||
| 
 | 
 | ||||||
|                 if (element.name == tag_var) { |                 if (substeps) { | ||||||
|                     tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) }); |                     report_error("substeps list must be the last child of algorithm step"); | ||||||
|                     return {}; |                     return; | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 if (element.name == tag_span) { |                 if (element.name == tag_var) { | ||||||
|                     auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class)); |                     auto variable_name = get_text_contents(child); | ||||||
|                     if (element_class != class_secnum) |                     if (!variable_name.has_value()) | ||||||
|                         return ParseError::create(String::formatted("Expected 'secnum' as a class name of <span>, but found '{}'", element_class), child); |                         report_error("malformed <var> subtree, expected single text child node"); | ||||||
|                     tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) }); | 
 | ||||||
|                     return {}; |                     tokens.append({ TokenType::Identifier, variable_name.value_or(""sv), child, move(child_location) }); | ||||||
|  |                     return; | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 if (element.name == tag_emu_val) { |                 if (element.name == tag_emu_val) { | ||||||
|                     auto contents = TRY(get_text_contents(child)); |                     auto maybe_contents = get_text_contents(child); | ||||||
|  |                     if (!maybe_contents.has_value()) | ||||||
|  |                         report_error("malformed <emu-val> subtree, expected single text child node"); | ||||||
|  | 
 | ||||||
|  |                     auto contents = maybe_contents.value_or(""sv); | ||||||
|  | 
 | ||||||
|                     if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"')) |                     if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"')) | ||||||
|                         tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) }); |                         tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) }); | ||||||
|                     else if (contents == "undefined") |                     else if (contents == "undefined") | ||||||
|                         tokens.append({ TokenType::Undefined, contents, child, move(child_location) }); |                         tokens.append({ TokenType::Undefined, contents, child, move(child_location) }); | ||||||
|                     else |                     else | ||||||
|                         tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); |                         tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); | ||||||
|                     return {}; |                     return; | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 if (element.name == tag_emu_xref) { |                 if (element.name == tag_emu_xref) { | ||||||
|                     auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv)))); |                     auto identifier = get_single_child_with_tag(child, "a"sv).map([](XML::Node const* node) { | ||||||
|                     tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); |                         return get_text_contents(node).value_or(""sv); | ||||||
|                     return {}; |                     }); | ||||||
|  |                     if (!identifier.has_value() || identifier.value().is_empty()) | ||||||
|  |                         report_error("malformed <emu-xref> subtree, expected <a> with nested single text node"); | ||||||
|  | 
 | ||||||
|  |                     tokens.append({ TokenType::Identifier, identifier.value_or(""sv), child, move(child_location) }); | ||||||
|  |                     return; | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 if (element.name == tag_ol) { |                 if (tree_type == TreeType::Header && element.name == tag_span) { | ||||||
|                     if (!allow_substeps) |                     auto element_class = get_attribute_by_name(child, attribute_class); | ||||||
|                         return ParseError::create("Found nested list but substeps are not allowed"sv, child); |                     if (element_class != class_secnum) | ||||||
|                     result.substeps = child; |                         report_error("expected <span> to have class='secnum' attribute"); | ||||||
|                     return {}; | 
 | ||||||
|  |                     auto section_number = get_text_contents(child); | ||||||
|  |                     if (!section_number.has_value()) | ||||||
|  |                         report_error("malformed section number span subtree, expected single text child node"); | ||||||
|  | 
 | ||||||
|  |                     tokens.append({ TokenType::SectionNumber, section_number.value_or(""sv), child, move(child_location) }); | ||||||
|  |                     return; | ||||||
|                 } |                 } | ||||||
| 
 | 
 | ||||||
|                 return ParseError::create(String::formatted("Unexpected child element with tag {}", element.name), child); |                 if (tree_type == TreeType::AlgorithmStep && element.name == tag_ol) { | ||||||
|  |                     substeps = child; | ||||||
|  |                     return; | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 report_error("<{}> should not be a child of algorithm step", element.name); | ||||||
|             }, |             }, | ||||||
|             [&](XML::Node::Text const& text) -> ParseErrorOr<void> { |             [&](XML::Node::Text const& text) { | ||||||
|                 auto view = text.builder.string_view(); |                 auto view = text.builder.string_view(); | ||||||
|                 if (result.substeps && !contains_empty_text(child)) |                 if (substeps != nullptr && !contains_empty_text(child)) { | ||||||
|                     return ParseError::create("Substeps list must be the last non-empty child"sv, child); |                     ctx.diag().error(ctx.location_from_xml_offset(child->offset), | ||||||
|                 return tokenize_string(ctx, child, view, tokens); |                         "substeps list must be the last child of algorithm step"); | ||||||
|  |                 } else { | ||||||
|  |                     tokenize_string(ctx, child, view, tokens); | ||||||
|  |                 } | ||||||
|             }, |             }, | ||||||
|             move(ignore_comments))); |             [&](auto const&) {}); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (tokens.size() && tokens.last().type == TokenType::MemberAccess) |     if (tokens.size() && tokens.last().type == TokenType::MemberAccess) | ||||||
|         tokens.last().type = TokenType::Dot; |         tokens.last().type = TokenType::Dot; | ||||||
|  | } | ||||||
|  | } | ||||||
| 
 | 
 | ||||||
|     return result; | StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node) | ||||||
|  | { | ||||||
|  |     TokenizerState state; | ||||||
|  |     tokenize_tree(ctx, state, node, TreeType::AlgorithmStep); | ||||||
|  |     return { | ||||||
|  |         .tokens = state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { move(state.tokens) }, | ||||||
|  |         .substeps = state.substeps, | ||||||
|  |     }; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node) | ||||||
|  | { | ||||||
|  |     TokenizerState state; | ||||||
|  |     tokenize_tree(ctx, state, node, TreeType::Header); | ||||||
|  |     return state.has_errors ? OptionalNone {} : Optional<Vector<Token>> { state.tokens }; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -31,13 +31,12 @@ inline constexpr StringView attribute_id = "id"sv; | ||||||
| 
 | 
 | ||||||
| inline constexpr StringView class_secnum = "secnum"sv; | inline constexpr StringView class_secnum = "secnum"sv; | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<void> tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector<Token>& tokens); | struct StepTokenizationResult { | ||||||
| 
 |     Optional<Vector<Token>> tokens; | ||||||
| struct TokenizeTreeResult { |  | ||||||
|     Vector<Token> tokens; |  | ||||||
|     XML::Node const* substeps = nullptr; |     XML::Node const* substeps = nullptr; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<TokenizeTreeResult> tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false); | StepTokenizationResult tokenize_step(SpecificationParsingContext& ctx, XML::Node const* node); | ||||||
|  | Optional<Vector<Token>> tokenize_header(SpecificationParsingContext& ctx, XML::Node const* node); | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -64,16 +64,9 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx, | ||||||
| { | { | ||||||
|     VERIFY(element->as_element().name == tag_li); |     VERIFY(element->as_element().name == tag_li); | ||||||
| 
 | 
 | ||||||
|     auto tokenization_result = tokenize_tree(ctx, element, true); |     auto [maybe_tokens, substeps] = tokenize_step(ctx, element); | ||||||
|     if (tokenization_result.is_error()) { |  | ||||||
|         ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()), |  | ||||||
|             "{}", tokenization_result.error()->to_string()); |  | ||||||
|         return {}; |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     auto [tokens, substeps] = tokenization_result.release_value(); |  | ||||||
|     AlgorithmStep result(ctx); |     AlgorithmStep result(ctx); | ||||||
|     result.m_tokens = move(tokens); |  | ||||||
|     result.m_node = element; |     result.m_node = element; | ||||||
| 
 | 
 | ||||||
|     if (substeps) { |     if (substeps) { | ||||||
|  | @ -86,6 +79,10 @@ Optional<AlgorithmStep> AlgorithmStep::create(SpecificationParsingContext& ctx, | ||||||
|         result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree; |         result.m_substeps = step_list.has_value() ? step_list->tree() : error_tree; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     if (!maybe_tokens.has_value()) | ||||||
|  |         return {}; | ||||||
|  |     result.m_tokens = maybe_tokens.release_value(); | ||||||
|  | 
 | ||||||
|     if (!result.parse()) |     if (!result.parse()) | ||||||
|         return {}; |         return {}; | ||||||
|     return result; |     return result; | ||||||
|  | @ -260,14 +257,11 @@ Optional<FailedTextParseDiagnostic> SpecificationClause::parse_header(XML::Node | ||||||
|     auto& ctx = *m_ctx_pointer; |     auto& ctx = *m_ctx_pointer; | ||||||
|     VERIFY(element->as_element().name == tag_h1); |     VERIFY(element->as_element().name == tag_h1); | ||||||
| 
 | 
 | ||||||
|     auto tokenization_result = tokenize_tree(ctx, element, false); |     auto maybe_tokens = tokenize_header(ctx, element); | ||||||
|     if (tokenization_result.is_error()) { |     if (!maybe_tokens.has_value()) | ||||||
|         return FailedTextParseDiagnostic { |         return {}; | ||||||
|             ctx.location_from_xml_offset(tokenization_result.error()->offset()), | 
 | ||||||
|             tokenization_result.error()->to_string() |     auto const& tokens = maybe_tokens.release_value(); | ||||||
|         }; |  | ||||||
|     } |  | ||||||
|     auto const& tokens = tokenization_result.release_value().tokens; |  | ||||||
| 
 | 
 | ||||||
|     TextParser parser(ctx, tokens, element); |     TextParser parser(ctx, tokens, element); | ||||||
|     auto parse_result = parser.parse_clause_header(); |     auto parse_result = parser.parse_clause_header(); | ||||||
|  | @ -289,6 +283,7 @@ void SpecificationClause::parse(XML::Node const* element) | ||||||
|     auto& ctx = context(); |     auto& ctx = context(); | ||||||
|     u32 child_index = 0; |     u32 child_index = 0; | ||||||
| 
 | 
 | ||||||
|  |     bool node_ignored_warning_issued = false; | ||||||
|     Optional<FailedTextParseDiagnostic> header_parse_error; |     Optional<FailedTextParseDiagnostic> header_parse_error; | ||||||
| 
 | 
 | ||||||
|     for (auto const& child : element->as_element().children) { |     for (auto const& child : element->as_element().children) { | ||||||
|  | @ -312,10 +307,12 @@ void SpecificationClause::parse(XML::Node const* element) | ||||||
|                         m_subclauses.append(create(ctx, child)); |                         m_subclauses.append(create(ctx, child)); | ||||||
|                         return; |                         return; | ||||||
|                     } |                     } | ||||||
|                     if (header_parse_error.has_value()) { |                     if (!node_ignored_warning_issued && m_header.header.has<AK::Empty>()) { | ||||||
|  |                         node_ignored_warning_issued = true; | ||||||
|                         ctx.diag().warn(ctx.location_from_xml_offset(child->offset), |                         ctx.diag().warn(ctx.location_from_xml_offset(child->offset), | ||||||
|                             "node content will be ignored since section header was not parsed successfully"); |                             "node content will be ignored since section header was not parsed successfully"); | ||||||
|                         ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message); |                         if (header_parse_error.has_value()) | ||||||
|  |                             ctx.diag().note(header_parse_error->location, "{}", header_parse_error->message); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 ++child_index; |                 ++child_index; | ||||||
|  |  | ||||||
|  | @ -16,15 +16,6 @@ bool contains_empty_text(XML::Node const* node) | ||||||
|     return node->as_text().builder.string_view().trim_whitespace().is_empty(); |     return node->as_text().builder.string_view().trim_whitespace().is_empty(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name) |  | ||||||
| { |  | ||||||
|     auto const& attribute = node->as_element().attributes.get(attribute_name); |  | ||||||
| 
 |  | ||||||
|     if (!attribute.has_value()) |  | ||||||
|         return ParseError::create(String::formatted("Attribute {} is not present", attribute_name), node); |  | ||||||
|     return attribute.value(); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name) | Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name) | ||||||
| { | { | ||||||
|     auto const& attribute = node->as_element().attributes.get(attribute_name); |     auto const& attribute = node->as_element().attributes.get(attribute_name); | ||||||
|  | @ -34,39 +25,34 @@ Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView att | ||||||
|     return attribute.value(); |     return attribute.value(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<StringView> get_text_contents(XML::Node const* node) | Optional<StringView> get_text_contents(XML::Node const* node) | ||||||
| { | { | ||||||
|     auto const& children = node->as_element().children; |     auto const& children = node->as_element().children; | ||||||
| 
 |  | ||||||
|     if (children.size() != 1 || !children[0]->is_text()) |     if (children.size() != 1 || !children[0]->is_text()) | ||||||
|         return ParseError::create("Expected single text node in a child list of the node"sv, node); |         return {}; | ||||||
|     return children[0]->as_text().builder.string_view(); |     return children[0]->as_text().builder.string_view(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name) | Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name) | ||||||
| { | { | ||||||
|     XML::Node const* result = nullptr; |     XML::Node const* result = nullptr; | ||||||
| 
 | 
 | ||||||
|     for (auto const& child : element->as_element().children) { |     for (auto const& child : element->as_element().children) { | ||||||
|         TRY(child->content.visit( |         auto is_valid = child->content.visit( | ||||||
|             [&](XML::Node::Element const& element) -> ParseErrorOr<void> { |             [&](XML::Node::Element const& element) { | ||||||
|                 if (element.name != tag_name) |  | ||||||
|                     return ParseError::create(String::formatted("Expected child with the tag name {} but found {}", tag_name, element.name), child); |  | ||||||
|                 if (result != nullptr) |  | ||||||
|                     return ParseError::create("Element must have only one child"sv, child); |  | ||||||
|                 result = child; |                 result = child; | ||||||
|                 return {}; |                 return result != nullptr || element.name != tag_name; | ||||||
|             }, |             }, | ||||||
|             [&](XML::Node::Text const&) -> ParseErrorOr<void> { |             [&](XML::Node::Text const&) { | ||||||
|                 if (!contains_empty_text(child)) |                 return contains_empty_text(child); | ||||||
|                     return ParseError::create("Element should not have non-empty child text nodes"sv, element); |  | ||||||
|                 return {}; |  | ||||||
|             }, |             }, | ||||||
|             move(ignore_comments))); |             [&](auto const&) { return true; }); | ||||||
|  |         if (!is_valid) | ||||||
|  |             return {}; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (result == nullptr) |     if (result == nullptr) | ||||||
|         return ParseError::create(String::formatted("Element must have only one child"), element); |         return {}; | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -20,11 +20,10 @@ inline constexpr IgnoreComments ignore_comments {}; | ||||||
| 
 | 
 | ||||||
| bool contains_empty_text(XML::Node const* node); | bool contains_empty_text(XML::Node const* node); | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<StringView> deprecated_get_attribute_by_name(XML::Node const* node, StringView attribute_name); |  | ||||||
| Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name); | Optional<StringView> get_attribute_by_name(XML::Node const* node, StringView attribute_name); | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<StringView> get_text_contents(XML::Node const* node); | Optional<StringView> get_text_contents(XML::Node const* node); | ||||||
| 
 | 
 | ||||||
| ParseErrorOr<XML::Node const*> get_only_child(XML::Node const* element, StringView tag_name); | Optional<XML::Node const*> get_single_child_with_tag(XML::Node const* element, StringView tag_name); | ||||||
| 
 | 
 | ||||||
| } | } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Dan Klishch
						Dan Klishch