mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-26 08:32:07 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			371 lines
		
	
	
	
		
			9.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			371 lines
		
	
	
	
		
			9.7 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
 | |
|  *
 | |
|  * SPDX-License-Identifier: BSD-2-Clause
 | |
|  */
 | |
| 
 | |
| #include "Lexer.h"
 | |
| #include <AK/CharacterTypes.h>
 | |
| #include <AK/Debug.h>
 | |
| #include <AK/Format.h>
 | |
| #include <AK/ScopeLogger.h>
 | |
| 
 | |
| namespace CMake {
 | |
| 
 | |
| static bool is_valid_identifier_initial_char(char c)
 | |
| {
 | |
|     return is_ascii_alpha(c) || c == '_';
 | |
| }
 | |
| 
 | |
| static bool is_valid_identifier_char(char c)
 | |
| {
 | |
|     return is_ascii_alphanumeric(c) || c == '_';
 | |
| }
 | |
| 
 | |
| ErrorOr<Vector<Token>> Lexer::lex(StringView input)
 | |
| {
 | |
|     Lexer lexer { input };
 | |
|     return lexer.lex_file();
 | |
| }
 | |
| 
 | |
| Lexer::Lexer(StringView input)
 | |
|     : GenericLexer(input)
 | |
| {
 | |
| }
 | |
| 
 | |
| ErrorOr<Vector<Token>> Lexer::lex_file()
 | |
| {
 | |
|     m_tokens.clear_with_capacity();
 | |
| 
 | |
|     while (!is_eof()) {
 | |
|         consume_whitespace_or_comments();
 | |
| 
 | |
|         if (is_eof())
 | |
|             break;
 | |
| 
 | |
|         if (is_valid_identifier_initial_char(peek())) {
 | |
|             consume_command_invocation();
 | |
|         } else {
 | |
|             consume_garbage();
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return m_tokens;
 | |
| }
 | |
| 
 | |
| void Lexer::skip_whitespace()
 | |
| {
 | |
|     while (!is_eof()) {
 | |
|         if (next_is('\n')) {
 | |
|             next_line();
 | |
|             continue;
 | |
|         }
 | |
|         auto consumed = consume_while([&](char c) {
 | |
|             return c == ' ' || c == '\t';
 | |
|         });
 | |
|         if (consumed.is_empty())
 | |
|             break;
 | |
|     }
 | |
| }
 | |
| 
 | |
| void Lexer::consume_whitespace_or_comments()
 | |
| {
 | |
|     ScopeLogger<CMAKE_DEBUG> log;
 | |
|     while (!is_eof()) {
 | |
|         skip_whitespace();
 | |
| 
 | |
|         if (next_is('#')) {
 | |
|             consume_comment();
 | |
|         } else {
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#command-invocations
 | |
| void Lexer::consume_command_invocation()
 | |
| {
 | |
|     ScopeLogger<CMAKE_DEBUG> log;
 | |
|     auto identifier_start = position();
 | |
|     auto identifier = consume_while(is_valid_identifier_char);
 | |
|     auto control_keyword = control_keyword_from_string(identifier);
 | |
|     if (control_keyword.has_value()) {
 | |
|         emit_token(Token::Type::ControlKeyword, identifier, identifier_start, position(), control_keyword.release_value());
 | |
|     } else {
 | |
|         emit_token(Token::Type::Identifier, identifier, identifier_start, position());
 | |
|     }
 | |
| 
 | |
|     consume_whitespace_or_comments();
 | |
| 
 | |
|     if (next_is('('))
 | |
|         consume_open_paren();
 | |
| 
 | |
|     consume_arguments();
 | |
| 
 | |
|     if (next_is(')'))
 | |
|         consume_close_paren();
 | |
| }
 | |
| 
 | |
| void Lexer::consume_arguments()
 | |
| {
 | |
|     ScopeLogger<CMAKE_DEBUG> log;
 | |
|     while (!is_eof()) {
 | |
|         consume_whitespace_or_comments();
 | |
| 
 | |
|         if (next_is('(')) {
 | |
|             consume_open_paren();
 | |
| 
 | |
|             consume_whitespace_or_comments();
 | |
|             consume_arguments();
 | |
|             consume_whitespace_or_comments();
 | |
| 
 | |
|             if (next_is(')'))
 | |
|                 consume_close_paren();
 | |
| 
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         if (next_is(')'))
 | |
|             return;
 | |
| 
 | |
|         consume_argument();
 | |
|     }
 | |
| }
 | |
| 
 | |
| // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#command-arguments
 | |
| void Lexer::consume_argument()
 | |
| {
 | |
|     ScopeLogger<CMAKE_DEBUG> log;
 | |
|     consume_whitespace_or_comments();
 | |
| 
 | |
|     if (next_is('[')) {
 | |
|         consume_bracket_argument();
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (next_is('"')) {
 | |
|         consume_quoted_argument();
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     consume_unquoted_argument();
 | |
| }
 | |
| 
 | |
| // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-argument
 | |
| void Lexer::consume_bracket_argument()
 | |
| {
 | |
|     ScopeLogger<CMAKE_DEBUG> log;
 | |
|     auto start = position();
 | |
|     auto value = read_bracket_argument();
 | |
|     emit_token(Token::Type::BracketArgument, value, start, position());
 | |
| }
 | |
| 
 | |
| // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#quoted-argument
 | |
| void Lexer::consume_quoted_argument()
 | |
| {
 | |
|     ScopeLogger<CMAKE_DEBUG> log;
 | |
|     auto start = position();
 | |
|     auto start_offset = tell();
 | |
| 
 | |
|     VERIFY(consume_specific('"'));
 | |
|     while (!is_eof()) {
 | |
|         if (next_is('"')) {
 | |
|             ignore();
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         if (next_is("\\\""sv)) {
 | |
|             ignore(2);
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         if (next_is('\n')) {
 | |
|             next_line();
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         ignore();
 | |
|     }
 | |
| 
 | |
|     auto whole_token = m_input.substring_view(start_offset, tell() - start_offset);
 | |
|     auto value = whole_token.substring_view(1, whole_token.length() - 2);
 | |
|     auto variable_references = parse_variable_references_from_argument(whole_token, start);
 | |
|     emit_token(Token::Type::QuotedArgument, value, start, position(), {}, move(variable_references));
 | |
| }
 | |
| 
 | |
| // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#unquoted-argument
 | |
| void Lexer::consume_unquoted_argument()
 | |
| {
 | |
|     ScopeLogger<CMAKE_DEBUG> log;
 | |
|     auto start_offset = tell();
 | |
|     auto start = position();
 | |
| 
 | |
|     while (!is_eof()) {
 | |
|         if (next_is('\\')) {
 | |
|             consume_escaped_character('\\');
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         auto consumed = consume_until([](char c) { return is_ascii_space(c) || "()#\"\\'"sv.contains(c); });
 | |
|         if (consumed.is_empty())
 | |
|             break;
 | |
| 
 | |
|         // FIXME: `unquoted_legacy`
 | |
|     }
 | |
| 
 | |
|     auto value = m_input.substring_view(start_offset, tell() - start_offset);
 | |
|     auto variable_references = parse_variable_references_from_argument(value, start);
 | |
|     emit_token(Token::Type::UnquotedArgument, value, start, position(), {}, move(variable_references));
 | |
| }
 | |
| 
 | |
| // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#comments
 | |
| void Lexer::consume_comment()
 | |
| {
 | |
|     ScopeLogger<CMAKE_DEBUG> log;
 | |
|     auto start = position();
 | |
| 
 | |
|     VERIFY(consume_specific('#'));
 | |
|     if (next_is('[')) {
 | |
|         // Bracket comment
 | |
|         // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-comment
 | |
|         auto comment = read_bracket_argument();
 | |
|         emit_token(Token::Type::BracketComment, comment, start, position());
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     // Line comment
 | |
|     // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#line-comment
 | |
|     auto comment = consume_until('\n');
 | |
|     emit_token(Token::Type::LineComment, comment, start, position());
 | |
| }
 | |
| 
 | |
| void Lexer::consume_open_paren()
 | |
| {
 | |
|     auto start = position();
 | |
|     VERIFY(consume_specific('('));
 | |
|     emit_token(Token::Type::OpenParen, "("sv, start, position());
 | |
| }
 | |
| 
 | |
| void Lexer::consume_close_paren()
 | |
| {
 | |
|     auto start = position();
 | |
|     VERIFY(consume_specific(')'));
 | |
|     emit_token(Token::Type::CloseParen, ")"sv, start, position());
 | |
| }
 | |
| 
 | |
| void Lexer::consume_garbage()
 | |
| {
 | |
|     ScopeLogger<CMAKE_DEBUG> log;
 | |
|     auto start = position();
 | |
|     auto contents = consume_until(is_ascii_space);
 | |
|     if (!contents.is_empty())
 | |
|         emit_token(Token::Type::Garbage, contents, start, position());
 | |
| }
 | |
| 
 | |
| // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-argument
 | |
| // Used by both bracket arguments and bracket comments.
 | |
| StringView Lexer::read_bracket_argument()
 | |
| {
 | |
|     VERIFY(consume_specific('['));
 | |
|     auto leading_equals_signs = consume_while([](char c) { return c == '='; });
 | |
|     consume_specific('[');
 | |
|     auto start = tell();
 | |
|     auto end = start;
 | |
|     while (!is_eof()) {
 | |
|         // Read everything until we see `]={len}]`.
 | |
|         ignore_until(']');
 | |
|         end = tell();
 | |
|         ignore();
 | |
|         if (next_is(leading_equals_signs))
 | |
|             ignore(leading_equals_signs.length());
 | |
|         if (consume_specific(']'))
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     return m_input.substring_view(start, end - start);
 | |
| }
 | |
| 
 | |
| // https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#variable-references
 | |
| Vector<VariableReference> Lexer::parse_variable_references_from_argument(StringView argument_value, Position argument_start)
 | |
| {
 | |
|     auto position = argument_start;
 | |
|     GenericLexer lexer { argument_value };
 | |
|     Vector<VariableReference> variable_references;
 | |
| 
 | |
|     while (!lexer.is_eof()) {
 | |
|         if (lexer.next_is('\n')) {
 | |
|             lexer.ignore();
 | |
|             position.column = 0;
 | |
|             position.line++;
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         if (lexer.next_is('\\')) {
 | |
|             lexer.ignore();
 | |
|             if (lexer.next_is('\n')) {
 | |
|                 lexer.ignore();
 | |
|                 position.column = 0;
 | |
|                 position.line++;
 | |
|                 continue;
 | |
|             }
 | |
|             lexer.ignore();
 | |
|             position.column += 2;
 | |
|         }
 | |
| 
 | |
|         if (lexer.next_is('$')) {
 | |
|             auto start = position;
 | |
|             lexer.ignore();
 | |
|             position.column++;
 | |
| 
 | |
|             if (lexer.next_is("ENV{"sv)) {
 | |
|                 lexer.ignore(4);
 | |
|                 position.column += 4;
 | |
|             } else if (lexer.next_is('{')) {
 | |
|                 lexer.ignore();
 | |
|                 position.column++;
 | |
|             } else {
 | |
|                 auto skipped = lexer.consume_until(is_any_of("$ \n"sv));
 | |
|                 position.column += skipped.length();
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             auto variable_name = lexer.consume_until(is_any_of("} \n"sv));
 | |
|             position.column += variable_name.length();
 | |
|             if (lexer.next_is('}')) {
 | |
|                 lexer.ignore();
 | |
|                 position.column++;
 | |
|                 variable_references.empend(variable_name, start, position);
 | |
|             }
 | |
| 
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         lexer.ignore();
 | |
|         position.column++;
 | |
|     }
 | |
| 
 | |
|     return variable_references;
 | |
| }
 | |
| 
 | |
| Position Lexer::position() const
 | |
| {
 | |
|     return Position {
 | |
|         .line = m_line,
 | |
|         .column = tell() - m_string_offset_after_previous_newline,
 | |
|     };
 | |
| }
 | |
| 
 | |
| void Lexer::next_line()
 | |
| {
 | |
|     VERIFY(consume_specific('\n'));
 | |
|     m_string_offset_after_previous_newline = tell();
 | |
|     m_line++;
 | |
| }
 | |
| 
 | |
| void Lexer::emit_token(Token::Type type, StringView value, Position start, Position end, Optional<ControlKeywordType> control_keyword, Vector<VariableReference> variable_references)
 | |
| {
 | |
|     dbgln_if(CMAKE_DEBUG, "Emitting {} token: `{}` ({}:{} to {}:{})", to_string(type), value, start.line, start.column, end.line, end.column);
 | |
|     m_tokens.empend(type, value, start, end, move(control_keyword), move(variable_references));
 | |
| }
 | |
| 
 | |
| }
 | 
