From 8505fcb8ae313792d32550bffec0a72f86e60102 Mon Sep 17 00:00:00 2001 From: Itamar Date: Wed, 11 Aug 2021 22:31:43 +0300 Subject: [PATCH] LibCpp: Understand preprocessor macro definition and invocation The preprocessor now understands when a function-like macro is defined, and can also parse calls to such macros. The actual evaluation of function-like macros will be done in a separate commit. --- .../Cpp/CppComprehensionEngine.cpp | 6 +- Userland/Libraries/LibCpp/Preprocessor.cpp | 145 +++++++++++++++--- Userland/Libraries/LibCpp/Preprocessor.h | 24 ++- Userland/Utilities/cpp-preprocessor.cpp | 11 ++ 4 files changed, 155 insertions(+), 31 deletions(-) diff --git a/Userland/DevTools/HackStudio/LanguageServers/Cpp/CppComprehensionEngine.cpp b/Userland/DevTools/HackStudio/LanguageServers/Cpp/CppComprehensionEngine.cpp index 2f171a3012..22640c3a95 100644 --- a/Userland/DevTools/HackStudio/LanguageServers/Cpp/CppComprehensionEngine.cpp +++ b/Userland/DevTools/HackStudio/LanguageServers/Cpp/CppComprehensionEngine.cpp @@ -165,7 +165,7 @@ Vector CppComprehensionEngine::autocomplete_na if (reference_scope.is_empty()) { for (auto& preprocessor_name : document.preprocessor().definitions().keys()) { if (preprocessor_name.starts_with(partial_text)) { - suggestions.append({ preprocessor_name.to_string(), partial_text.length(), GUI::AutocompleteProvider::CompletionKind::PreprocessorDefinition }); + suggestions.append({ preprocessor_name, partial_text.length(), GUI::AutocompleteProvider::CompletionKind::PreprocessorDefinition }); } } } @@ -413,9 +413,9 @@ Optional CppComprehensionEngine::fin // Search for a replaced preprocessor token that intersects with text_position for (auto& substitution : document.preprocessor().substitutions()) { - if (substitution.original_token.start() > cpp_position) + if (substitution.original_tokens.first().start() > cpp_position) continue; - if (substitution.original_token.end() < cpp_position) + if (substitution.original_tokens.first().end() < cpp_position) continue; return GUI::AutocompleteProvider::ProjectLocation { substitution.defined_value.filename, substitution.defined_value.line, substitution.defined_value.column }; diff --git a/Userland/Libraries/LibCpp/Preprocessor.cpp b/Userland/Libraries/LibCpp/Preprocessor.cpp index ca2a28987b..002f668993 100644 --- a/Userland/Libraries/LibCpp/Preprocessor.cpp +++ b/Userland/Libraries/LibCpp/Preprocessor.cpp @@ -140,18 +140,9 @@ void Preprocessor::handle_preprocessor_keyword(const StringView& keyword, Generi if (keyword == "define") { if (m_state == State::Normal) { - auto key = line_lexer.consume_until(' '); - consume_whitespace(line_lexer); - - DefinedValue value; - value.filename = m_filename; - value.line = m_line_index; - - auto string_value = line_lexer.consume_all(); - if (!string_value.is_empty()) - value.value = string_value; - - m_definitions.set(key, value); + auto definition = create_definition(line_lexer.consume_all()); + if (definition.has_value()) + m_definitions.set(definition->key, *definition); } return; } @@ -230,14 +221,15 @@ void Preprocessor::handle_preprocessor_keyword(const StringView& keyword, Generi void Preprocessor::process_line(StringView const& line) { Lexer line_lexer { line, m_line_index }; + line_lexer.set_ignore_whitespace(true); auto tokens = line_lexer.lex(); - for (auto& token : tokens) { - if (token.type() == Token::Type::Whitespace) - continue; + for (size_t i = 0; i < tokens.size(); ++i) { + auto& token = tokens[i]; if (token.type() == Token::Type::Identifier) { if (auto defined_value = m_definitions.find(token.text()); defined_value != m_definitions.end()) { - do_substitution(token, defined_value->value); + auto last_substituted_token_index = do_substitution(tokens, i, defined_value->value); + i = last_substituted_token_index; continue; } } @@ -245,21 +237,128 @@ void Preprocessor::process_line(StringView const& line) } } -void Preprocessor::do_substitution(Token const& replaced_token, DefinedValue const& defined_value) +size_t Preprocessor::do_substitution(Vector const& tokens, size_t token_index, Definition const& defined_value) { - m_substitutions.append({ replaced_token, defined_value }); - if (defined_value.value.is_null()) - return; + return token_index; - Lexer lexer(m_substitutions.last().defined_value.value); + Substitution sub; + sub.defined_value = defined_value; + + auto macro_call = parse_macro_call(tokens, token_index); + + if (!macro_call.has_value()) + return token_index; + + // TODO: Evaluate macro call + auto processed_value = defined_value.value; + Vector original_tokens; + for (size_t i = token_index; i <= macro_call->end_token_index; ++i) { + original_tokens.append(tokens[i]); + } + VERIFY(!original_tokens.is_empty()); + + m_substitutions.append({ original_tokens, defined_value, processed_value }); + + Lexer lexer(processed_value); for (auto& token : lexer.lex()) { if (token.type() == Token::Type::Whitespace) continue; - token.set_start(replaced_token.start()); - token.set_end(replaced_token.end()); + token.set_start(original_tokens.first().start()); + token.set_end(original_tokens.first().end()); m_tokens.append(token); } + return macro_call->end_token_index; +} + +Optional Preprocessor::parse_macro_call(Vector const& tokens, size_t token_index) +{ + auto name = tokens[token_index]; + ++token_index; + + if (token_index >= tokens.size() || tokens[token_index].type() != Token::Type::LeftParen) + return MacroCall { name, {}, token_index - 1 }; + ++token_index; + + Vector arguments; + MacroCall::Argument current_argument; + + size_t paren_depth = 1; + for (; token_index < tokens.size(); ++token_index) { + auto& token = tokens[token_index]; + if (token.type() == Token::Type::LeftParen) + ++paren_depth; + if (token.type() == Token::Type::RightParen) + --paren_depth; + + if (paren_depth == 0) { + arguments.append(move(current_argument)); + break; + } + + if (paren_depth == 1 && token.type() == Token::Type::Comma) { + arguments.append(move(current_argument)); + current_argument = {}; + } else { + current_argument.tokens.append(token); + } + } + + if (token_index >= tokens.size()) + return {}; + + return MacroCall { name, move(arguments), token_index }; +} + +Optional Preprocessor::create_definition(StringView line) +{ + Lexer lexer { line }; + lexer.set_ignore_whitespace(true); + auto tokens = lexer.lex(); + if (tokens.is_empty()) + return {}; + + if (tokens.first().type() != Token::Type::Identifier) + return {}; + + Definition definition; + definition.filename = m_filename; + definition.line = m_line_index; + + definition.key = tokens.first().text(); + + if (tokens.size() == 1) + return definition; + + size_t token_index = 1; + // Parse macro parameters (if any) + if (tokens[token_index].type() == Token::Type::LeftParen) { + ++token_index; + while (token_index < tokens.size() && tokens[token_index].type() != Token::Type::RightParen) { + auto param = tokens[token_index]; + if (param.type() != Token::Type::Identifier) + return {}; + + if (token_index + 1 >= tokens.size()) + return {}; + + ++token_index; + + if (tokens[token_index].type() == Token::Type::Comma) + ++token_index; + else if (tokens[token_index].type() != Token::Type::RightParen) + return {}; + + definition.parameters.empend(param.text()); + } + if (token_index >= tokens.size()) + return {}; + ++token_index; + } + + definition.value = line.substring_view(tokens[token_index].start().column); + + return definition; } }; diff --git a/Userland/Libraries/LibCpp/Preprocessor.h b/Userland/Libraries/LibCpp/Preprocessor.h index 91410d6590..f91ee965f3 100644 --- a/Userland/Libraries/LibCpp/Preprocessor.h +++ b/Userland/Libraries/LibCpp/Preprocessor.h @@ -24,17 +24,20 @@ public: Vector process_and_lex(); Vector included_paths() const { return m_included_paths; } - struct DefinedValue { + struct Definition { + String key; + Vector parameters; String value; FlyString filename; size_t line { 0 }; size_t column { 0 }; }; - using Definitions = HashMap; + using Definitions = HashMap; struct Substitution { - Token original_token; - DefinedValue defined_value; + Vector original_tokens; + Definition defined_value; + String processed_value; }; Definitions const& definitions() const { return m_definitions; } @@ -50,7 +53,18 @@ private: PreprocessorKeyword handle_preprocessor_line(StringView const&); void handle_preprocessor_keyword(StringView const& keyword, GenericLexer& line_lexer); void process_line(StringView const& line); - void do_substitution(Token const& replaced_token, DefinedValue const&); + size_t do_substitution(Vector const& tokens, size_t token_index, Definition const&); + Optional create_definition(StringView line); + + struct MacroCall { + Token name; + struct Argument { + Vector tokens; + }; + Vector arguments; + size_t end_token_index { 0 }; + }; + Optional parse_macro_call(Vector const& tokens, size_t token_index); String m_filename; String m_program; diff --git a/Userland/Utilities/cpp-preprocessor.cpp b/Userland/Utilities/cpp-preprocessor.cpp index 0069756681..fd60f3a156 100644 --- a/Userland/Utilities/cpp-preprocessor.cpp +++ b/Userland/Utilities/cpp-preprocessor.cpp @@ -17,6 +17,17 @@ int main(int, char**) auto content = file->read_all(); Cpp::Preprocessor cpp("other.h", StringView { content }); auto tokens = cpp.process_and_lex(); + + outln("Definitions:"); + for (auto& definition : cpp.definitions()) { + if (definition.value.parameters.is_empty()) + outln("{}: {}", definition.key, definition.value.value); + else + outln("{}({}): {}", definition.key, String::join(",", definition.value.parameters), definition.value.value); + } + + outln(""); + for (auto& token : tokens) { dbgln("{}", token.to_string()); }