diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Forward.h b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Forward.h index da75213ceb..4323b06abb 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Forward.h +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Forward.h @@ -59,6 +59,7 @@ class ControlFlowGraph; class RecursiveASTVisitor; // Parser/SpecParser.h +class SpecificationParsingContext; class AlgorithmStep; class AlgorithmStepList; class Algorithm; diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp index 7a06fa6dd2..34b0e47c7a 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.cpp @@ -4,16 +4,17 @@ * SPDX-License-Identifier: BSD-2-Clause */ -#include #include +#include #include "Parser/Lexer.h" +#include "Parser/SpecParser.h" #include "Parser/XMLUtils.h" namespace JSSpecCompiler { namespace { -Optional consume_number(GenericLexer& lexer, XML::Node const* node) +Optional consume_number(XML::LineTrackingLexer& lexer, XML::Node const* node, Location& location) { u64 start = lexer.tell(); @@ -35,7 +36,7 @@ Optional consume_number(GenericLexer& lexer, XML::Node const* node) auto length = lexer.tell() - start; lexer.retreat(length); - return { Token { TokenType::Number, lexer.consume(length), node } }; + return { Token { TokenType::Number, lexer.consume(length), node, move(location) } }; } bool can_end_word_token(char c) @@ -44,56 +45,68 @@ bool can_end_word_token(char c) } } -ParseErrorOr tokenize_string(XML::Node const* node, StringView view, Vector& tokens) +ParseErrorOr tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector& tokens) { -#define CONSUME_IF_NEXT(view, type) \ - if (lexer.next_is(view##sv)) { \ - size_t length = __builtin_strlen(view); \ - tokens.append({ TokenType::type, lexer.consume(length), node }); \ - continue; \ - } + static constexpr struct { + StringView text_to_match; + TokenType token_type; + } choices[] = { + { "-"sv, TokenType::AmbiguousMinus }, + { "}"sv, TokenType::BraceClose }, + { "{"sv, TokenType::BraceOpen }, + { ":"sv, TokenType::Colon }, + { ","sv, TokenType::Comma }, + { "/"sv, TokenType::Division }, + { ". "sv, TokenType::Dot }, + { ".\n"sv, TokenType::Dot }, + { "="sv, TokenType::Equals }, + { "is equal to"sv, TokenType::Equals }, + { "!"sv, TokenType::ExclamationMark }, + { ">"sv, TokenType::Greater }, + { "is"sv, TokenType::Is }, + { "<"sv, TokenType::Less }, + { "."sv, TokenType::MemberAccess }, + { "×"sv, TokenType::Multiplication }, + { "is not equal to"sv, TokenType::NotEquals }, + { "≠"sv, TokenType::NotEquals }, + { ")"sv, TokenType::ParenClose }, + { "("sv, TokenType::ParenOpen }, + { "+"sv, TokenType::Plus }, + }; + + XML::LineTrackingLexer lexer(view, node->offset); - GenericLexer lexer(view); while (!lexer.is_eof()) { lexer.ignore_while(is_ascii_space); - if (auto result = consume_number(lexer, node); result.has_value()) { + // FIXME: This is incorrect since we count text offset after XML reference resolution. To do + // this properly, we need support from XML::Parser. + Location token_location = ctx.location_from_xml_offset(lexer.offset_for(lexer.tell())); + + if (auto result = consume_number(lexer, node, token_location); result.has_value()) { tokens.append(result.release_value()); continue; } - CONSUME_IF_NEXT("(", ParenOpen); - CONSUME_IF_NEXT(")", ParenClose); - CONSUME_IF_NEXT("{", BraceOpen); - CONSUME_IF_NEXT("}", BraceClose); - CONSUME_IF_NEXT(",", Comma); - CONSUME_IF_NEXT(". ", Dot); - CONSUME_IF_NEXT(".\n", Dot); - CONSUME_IF_NEXT(":", Colon); - CONSUME_IF_NEXT(".", MemberAccess); - CONSUME_IF_NEXT("<", Less); - CONSUME_IF_NEXT(">", Greater); - CONSUME_IF_NEXT("is not equal to", NotEquals); - CONSUME_IF_NEXT("≠", NotEquals); - CONSUME_IF_NEXT("is equal to", Equals); - CONSUME_IF_NEXT("=", Equals); - CONSUME_IF_NEXT("+", Plus); - CONSUME_IF_NEXT("-", AmbiguousMinus); - CONSUME_IF_NEXT("×", Multiplication); - CONSUME_IF_NEXT("/", Division); - CONSUME_IF_NEXT("!", ExclamationMark); - CONSUME_IF_NEXT("is", Is); + bool matched = false; + for (auto const& [text_to_match, token_type] : choices) { + if (lexer.consume_specific(text_to_match)) { + tokens.append({ token_type, ""sv, node, move(token_location) }); + matched = true; + break; + } + } + if (matched) + continue; StringView word = lexer.consume_until(can_end_word_token); if (word.length()) - tokens.append({ TokenType::Word, word, node }); + tokens.append({ TokenType::Word, word, node, move(token_location) }); } return {}; - -#undef CONSUME_IF_NEXT } -ParseErrorOr tokenize_tree(XML::Node const* node, bool allow_substeps) +ParseErrorOr tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps) { TokenizeTreeResult result; auto& tokens = result.tokens; @@ -104,8 +117,10 @@ ParseErrorOr tokenize_tree(XML::Node const* node, bool allow if (result.substeps != nullptr) return ParseError::create("Substeps list must be the last non-empty child"sv, child); + Location child_location = ctx.location_from_xml_offset(child->offset); + if (element.name == tag_var) { - tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child }); + tokens.append({ TokenType::Identifier, TRY(get_text_contents(child)), child, move(child_location) }); return {}; } @@ -113,24 +128,24 @@ ParseErrorOr tokenize_tree(XML::Node const* node, bool allow auto element_class = TRY(deprecated_get_attribute_by_name(child, attribute_class)); if (element_class != class_secnum) return ParseError::create(String::formatted("Expected 'secnum' as a class name of , but found '{}'", element_class), child); - tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child }); + tokens.append({ TokenType::SectionNumber, TRY(get_text_contents(child)), child, move(child_location) }); return {}; } if (element.name == tag_emu_val) { auto contents = TRY(get_text_contents(child)); if (contents.length() >= 2 && contents.starts_with('"') && contents.ends_with('"')) - tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child }); + tokens.append({ TokenType::String, contents.substring_view(1, contents.length() - 2), child, move(child_location) }); else if (contents == "undefined") - tokens.append({ TokenType::Undefined, contents, child }); + tokens.append({ TokenType::Undefined, contents, child, move(child_location) }); else - tokens.append({ TokenType::Identifier, contents, child }); + tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); return {}; } if (element.name == tag_emu_xref) { auto contents = TRY(get_text_contents(TRY(get_only_child(child, "a"sv)))); - tokens.append({ TokenType::Identifier, contents, child }); + tokens.append({ TokenType::Identifier, contents, child, move(child_location) }); return {}; } @@ -147,7 +162,7 @@ ParseErrorOr tokenize_tree(XML::Node const* node, bool allow auto view = text.builder.string_view(); if (result.substeps && !contains_empty_text(child)) return ParseError::create("Substeps list must be the last non-empty child"sv, child); - return tokenize_string(child, view, tokens); + return tokenize_string(ctx, child, view, tokens); }, move(ignore_comments))); } diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h index 9f9695d36d..3756309a39 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Lexer.h @@ -31,13 +31,13 @@ inline constexpr StringView attribute_id = "id"sv; inline constexpr StringView class_secnum = "secnum"sv; -ParseErrorOr tokenize_string(XML::Node const* node, StringView view, Vector& tokens); +ParseErrorOr tokenize_string(SpecificationParsingContext& ctx, XML::Node const* node, StringView view, Vector& tokens); struct TokenizeTreeResult { Vector tokens; XML::Node const* substeps = nullptr; }; -ParseErrorOr tokenize_tree(XML::Node const* node, bool allow_substeps = false); +ParseErrorOr tokenize_tree(SpecificationParsingContext& ctx, XML::Node const* node, bool allow_substeps = false); } diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp index 4a7779a9f1..663d9ecb59 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/SpecParser.cpp @@ -64,7 +64,7 @@ Optional AlgorithmStep::create(SpecificationParsingContext& ctx, { VERIFY(element->as_element().name == tag_li); - auto tokenization_result = tokenize_tree(element, true); + auto tokenization_result = tokenize_tree(ctx, element, true); if (tokenization_result.is_error()) { ctx.diag().error(ctx.location_from_xml_offset(tokenization_result.error()->offset()), "{}", tokenization_result.error()->to_string()); @@ -253,7 +253,7 @@ void SpecificationClause::collect_into(TranslationUnitRef translation_unit) ParseErrorOr SpecificationClause::parse_header(XML::Node const* element) { VERIFY(element->as_element().name == tag_h1); - auto tokens = TRY(tokenize_tree(element)); + auto tokens = TRY(tokenize_tree(*m_ctx_pointer, element)); TextParser parser(tokens.tokens, element); m_header = TRY(parser.parse_clause_header()); return {}; diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/TextParser.cpp b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/TextParser.cpp index 5ace444ea1..21ae23e8f7 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/TextParser.cpp +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/TextParser.cpp @@ -226,7 +226,7 @@ ParseErrorOr TextParser::parse_expression() if (token.type == TokenType::ParenOpen) { if (last_element_type == ExpressionType) - stack.append(Token { TokenType::FunctionCall, ""sv, m_node }); + stack.append(Token { TokenType::FunctionCall, ""sv, token.node, token.location }); stack.append(token); if (m_next_token_index + 1 < m_tokens.size() diff --git a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Token.h b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Token.h index 24d723b2e7..e968079b4c 100644 --- a/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Token.h +++ b/Meta/Lagom/Tools/CodeGenerators/JSSpecCompiler/Parser/Token.h @@ -9,6 +9,7 @@ #include #include "AST/AST.h" +#include "DiagnosticEngine.h" namespace JSSpecCompiler { @@ -46,7 +47,6 @@ constexpr i32 closing_bracket_precedence = 18; F(Multiplication, 5, Invalid, Multiplication, Invalid) \ F(Division, 5, Invalid, Division, Invalid) \ F(FunctionCall, 2, Invalid, FunctionCall, Invalid) \ - F(ArraySubscript, 2, Invalid, ArraySubscript, Invalid) \ F(ExclamationMark, 3, AssertCompletion, Invalid, Invalid) \ F(Is, -1, Invalid, Invalid, Invalid) @@ -110,6 +110,7 @@ struct Token { TokenType type; StringView data; XML::Node const* node; + Location location; }; } diff --git a/Userland/Libraries/LibXML/Parser/Parser.h b/Userland/Libraries/LibXML/Parser/Parser.h index cf71d63852..3529d08ede 100644 --- a/Userland/Libraries/LibXML/Parser/Parser.h +++ b/Userland/Libraries/LibXML/Parser/Parser.h @@ -39,10 +39,21 @@ struct Listener { virtual void error(ParseError const&) { } }; +// FIXME: This is also used in JSSpecCompiler, so should probably live in AK or even merged with +// AK::GenericLexer. class LineTrackingLexer : public GenericLexer { public: using GenericLexer::GenericLexer; + LineTrackingLexer(StringView input, XML::Offset start_offset) + : GenericLexer(input) + , m_cached_offset { + .line = start_offset.line, + .column = start_offset.column, + } + { + } + Offset cached_offset() const { return m_cached_offset; } void restore_cached_offset(Offset cached_offset) { m_cached_offset = cached_offset; } Offset offset_for(size_t) const;