mirror of
https://github.com/RGBCube/serenity
synced 2025-05-14 06:14:58 +00:00
LibCMake: Introduce a CMake lexer
This commit is contained in:
parent
ca4cc03269
commit
3d8cc2257f
8 changed files with 581 additions and 0 deletions
|
@ -34,6 +34,10 @@
|
|||
# cmakedefine01 CANVAS_RENDERING_CONTEXT_2D_DEBUG
|
||||
#endif
|
||||
|
||||
#ifndef CMAKE_DEBUG
|
||||
# cmakedefine01 CMAKE_DEBUG
|
||||
#endif
|
||||
|
||||
#ifndef COMPOSE_DEBUG
|
||||
# cmakedefine01 COMPOSE_DEBUG
|
||||
#endif
|
||||
|
|
|
@ -13,6 +13,7 @@ set(BXVGA_DEBUG ON)
|
|||
set(CACHE_DEBUG ON)
|
||||
set(CALLBACK_MACHINE_DEBUG ON)
|
||||
set(CANVAS_RENDERING_CONTEXT_2D_DEBUG ON)
|
||||
set(CMAKE_DEBUG ON)
|
||||
set(COMMIT_DEBUG ON)
|
||||
set(COMPOSE_DEBUG ON)
|
||||
set(CONTEXT_SWITCH_DEBUG ON)
|
||||
|
|
|
@ -3,6 +3,7 @@ add_subdirectory(LibAudio)
|
|||
add_subdirectory(LibC)
|
||||
add_subdirectory(LibCards)
|
||||
add_subdirectory(LibChess)
|
||||
add_subdirectory(LibCMake)
|
||||
add_subdirectory(LibCodeComprehension)
|
||||
add_subdirectory(LibCompress)
|
||||
add_subdirectory(LibConfig)
|
||||
|
|
7
Userland/Libraries/LibCMake/CMakeLists.txt
Normal file
7
Userland/Libraries/LibCMake/CMakeLists.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
set(SOURCES
|
||||
Lexer.cpp
|
||||
Token.cpp
|
||||
)
|
||||
|
||||
serenity_lib(LibCMake cmake)
|
||||
target_link_libraries(LibCMake PRIVATE LibSyntax)
|
371
Userland/Libraries/LibCMake/Lexer.cpp
Normal file
371
Userland/Libraries/LibCMake/Lexer.cpp
Normal file
|
@ -0,0 +1,371 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "Lexer.h"
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/Debug.h>
|
||||
#include <AK/Format.h>
|
||||
#include <AK/ScopeLogger.h>
|
||||
|
||||
namespace CMake {
|
||||
|
||||
static bool is_valid_identifier_initial_char(char c)
|
||||
{
|
||||
return is_ascii_alpha(c) || c == '_';
|
||||
}
|
||||
|
||||
static bool is_valid_identifier_char(char c)
|
||||
{
|
||||
return is_ascii_alphanumeric(c) || c == '_';
|
||||
}
|
||||
|
||||
ErrorOr<Vector<Token>> Lexer::lex(StringView input)
|
||||
{
|
||||
Lexer lexer { input };
|
||||
return lexer.lex_file();
|
||||
}
|
||||
|
||||
Lexer::Lexer(StringView input)
|
||||
: GenericLexer(input)
|
||||
{
|
||||
}
|
||||
|
||||
ErrorOr<Vector<Token>> Lexer::lex_file()
|
||||
{
|
||||
m_tokens.clear_with_capacity();
|
||||
|
||||
while (!is_eof()) {
|
||||
consume_whitespace_or_comments();
|
||||
|
||||
if (is_eof())
|
||||
break;
|
||||
|
||||
if (is_valid_identifier_initial_char(peek())) {
|
||||
consume_command_invocation();
|
||||
} else {
|
||||
consume_garbage();
|
||||
}
|
||||
}
|
||||
|
||||
return m_tokens;
|
||||
}
|
||||
|
||||
void Lexer::skip_whitespace()
|
||||
{
|
||||
while (!is_eof()) {
|
||||
if (next_is('\n')) {
|
||||
next_line();
|
||||
continue;
|
||||
}
|
||||
auto consumed = consume_while([&](char c) {
|
||||
return c == ' ' || c == '\t';
|
||||
});
|
||||
if (consumed.is_empty())
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Lexer::consume_whitespace_or_comments()
|
||||
{
|
||||
ScopeLogger<CMAKE_DEBUG> log;
|
||||
while (!is_eof()) {
|
||||
skip_whitespace();
|
||||
|
||||
if (next_is('#')) {
|
||||
consume_comment();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#command-invocations
|
||||
void Lexer::consume_command_invocation()
|
||||
{
|
||||
ScopeLogger<CMAKE_DEBUG> log;
|
||||
auto identifier_start = position();
|
||||
auto identifier = consume_while(is_valid_identifier_char);
|
||||
auto control_keyword = control_keyword_from_string(identifier);
|
||||
if (control_keyword.has_value()) {
|
||||
emit_token(Token::Type::ControlKeyword, identifier, identifier_start, position(), control_keyword.release_value());
|
||||
} else {
|
||||
emit_token(Token::Type::Identifier, identifier, identifier_start, position());
|
||||
}
|
||||
|
||||
consume_whitespace_or_comments();
|
||||
|
||||
if (next_is('('))
|
||||
consume_open_paren();
|
||||
|
||||
consume_arguments();
|
||||
|
||||
if (next_is(')'))
|
||||
consume_close_paren();
|
||||
}
|
||||
|
||||
void Lexer::consume_arguments()
|
||||
{
|
||||
ScopeLogger<CMAKE_DEBUG> log;
|
||||
while (!is_eof()) {
|
||||
consume_whitespace_or_comments();
|
||||
|
||||
if (next_is('(')) {
|
||||
consume_open_paren();
|
||||
|
||||
consume_whitespace_or_comments();
|
||||
consume_arguments();
|
||||
consume_whitespace_or_comments();
|
||||
|
||||
if (next_is(')'))
|
||||
consume_close_paren();
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (next_is(')'))
|
||||
return;
|
||||
|
||||
consume_argument();
|
||||
}
|
||||
}
|
||||
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#command-arguments
|
||||
void Lexer::consume_argument()
|
||||
{
|
||||
ScopeLogger<CMAKE_DEBUG> log;
|
||||
consume_whitespace_or_comments();
|
||||
|
||||
if (next_is('[')) {
|
||||
consume_bracket_argument();
|
||||
return;
|
||||
}
|
||||
|
||||
if (next_is('"')) {
|
||||
consume_quoted_argument();
|
||||
return;
|
||||
}
|
||||
|
||||
consume_unquoted_argument();
|
||||
}
|
||||
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-argument
|
||||
void Lexer::consume_bracket_argument()
|
||||
{
|
||||
ScopeLogger<CMAKE_DEBUG> log;
|
||||
auto start = position();
|
||||
auto value = read_bracket_argument();
|
||||
emit_token(Token::Type::BracketArgument, value, start, position());
|
||||
}
|
||||
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#quoted-argument
|
||||
void Lexer::consume_quoted_argument()
|
||||
{
|
||||
ScopeLogger<CMAKE_DEBUG> log;
|
||||
auto start = position();
|
||||
auto start_offset = tell();
|
||||
|
||||
VERIFY(consume_specific('"'));
|
||||
while (!is_eof()) {
|
||||
if (next_is('"')) {
|
||||
ignore();
|
||||
break;
|
||||
}
|
||||
|
||||
if (next_is("\\\""sv)) {
|
||||
ignore(2);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (next_is('\n')) {
|
||||
next_line();
|
||||
continue;
|
||||
}
|
||||
|
||||
ignore();
|
||||
}
|
||||
|
||||
auto whole_token = m_input.substring_view(start_offset, tell() - start_offset);
|
||||
auto value = whole_token.substring_view(1, whole_token.length() - 2);
|
||||
auto variable_references = parse_variable_references_from_argument(whole_token, start);
|
||||
emit_token(Token::Type::QuotedArgument, value, start, position(), {}, move(variable_references));
|
||||
}
|
||||
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#unquoted-argument
|
||||
void Lexer::consume_unquoted_argument()
|
||||
{
|
||||
ScopeLogger<CMAKE_DEBUG> log;
|
||||
auto start_offset = tell();
|
||||
auto start = position();
|
||||
|
||||
while (!is_eof()) {
|
||||
if (next_is('\\')) {
|
||||
consume_escaped_character('\\');
|
||||
continue;
|
||||
}
|
||||
|
||||
auto consumed = consume_until([](char c) { return is_ascii_space(c) || "()#\"\\'"sv.contains(c); });
|
||||
if (consumed.is_empty())
|
||||
break;
|
||||
|
||||
// FIXME: `unquoted_legacy`
|
||||
}
|
||||
|
||||
auto value = m_input.substring_view(start_offset, tell() - start_offset);
|
||||
auto variable_references = parse_variable_references_from_argument(value, start);
|
||||
emit_token(Token::Type::UnquotedArgument, value, start, position(), {}, move(variable_references));
|
||||
}
|
||||
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#comments
|
||||
void Lexer::consume_comment()
|
||||
{
|
||||
ScopeLogger<CMAKE_DEBUG> log;
|
||||
auto start = position();
|
||||
|
||||
VERIFY(consume_specific('#'));
|
||||
if (next_is('[')) {
|
||||
// Bracket comment
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-comment
|
||||
auto comment = read_bracket_argument();
|
||||
emit_token(Token::Type::BracketComment, comment, start, position());
|
||||
return;
|
||||
}
|
||||
|
||||
// Line comment
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#line-comment
|
||||
auto comment = consume_until('\n');
|
||||
emit_token(Token::Type::LineComment, comment, start, position());
|
||||
}
|
||||
|
||||
void Lexer::consume_open_paren()
|
||||
{
|
||||
auto start = position();
|
||||
VERIFY(consume_specific('('));
|
||||
emit_token(Token::Type::OpenParen, "("sv, start, position());
|
||||
}
|
||||
|
||||
void Lexer::consume_close_paren()
|
||||
{
|
||||
auto start = position();
|
||||
VERIFY(consume_specific(')'));
|
||||
emit_token(Token::Type::CloseParen, ")"sv, start, position());
|
||||
}
|
||||
|
||||
void Lexer::consume_garbage()
|
||||
{
|
||||
ScopeLogger<CMAKE_DEBUG> log;
|
||||
auto start = position();
|
||||
auto contents = consume_until(is_ascii_space);
|
||||
if (!contents.is_empty())
|
||||
emit_token(Token::Type::Garbage, contents, start, position());
|
||||
}
|
||||
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#bracket-argument
|
||||
// Used by both bracket arguments and bracket comments.
|
||||
StringView Lexer::read_bracket_argument()
|
||||
{
|
||||
VERIFY(consume_specific('['));
|
||||
auto leading_equals_signs = consume_while([](char c) { return c == '='; });
|
||||
consume_specific('[');
|
||||
auto start = tell();
|
||||
auto end = start;
|
||||
while (!is_eof()) {
|
||||
// Read everything until we see `]={len}]`.
|
||||
ignore_until(']');
|
||||
end = tell();
|
||||
ignore();
|
||||
if (next_is(leading_equals_signs))
|
||||
ignore(leading_equals_signs.length());
|
||||
if (consume_specific(']'))
|
||||
break;
|
||||
}
|
||||
|
||||
return m_input.substring_view(start, end - start);
|
||||
}
|
||||
|
||||
// https://cmake.org/cmake/help/latest/manual/cmake-language.7.html#variable-references
|
||||
Vector<VariableReference> Lexer::parse_variable_references_from_argument(StringView argument_value, Position argument_start)
|
||||
{
|
||||
auto position = argument_start;
|
||||
GenericLexer lexer { argument_value };
|
||||
Vector<VariableReference> variable_references;
|
||||
|
||||
while (!lexer.is_eof()) {
|
||||
if (lexer.next_is('\n')) {
|
||||
lexer.ignore();
|
||||
position.column = 0;
|
||||
position.line++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (lexer.next_is('\\')) {
|
||||
lexer.ignore();
|
||||
if (lexer.next_is('\n')) {
|
||||
lexer.ignore();
|
||||
position.column = 0;
|
||||
position.line++;
|
||||
continue;
|
||||
}
|
||||
lexer.ignore();
|
||||
position.column += 2;
|
||||
}
|
||||
|
||||
if (lexer.next_is('$')) {
|
||||
auto start = position;
|
||||
lexer.ignore();
|
||||
position.column++;
|
||||
|
||||
if (lexer.next_is("ENV{"sv)) {
|
||||
lexer.ignore(4);
|
||||
position.column += 4;
|
||||
} else if (lexer.next_is('{')) {
|
||||
lexer.ignore();
|
||||
position.column++;
|
||||
} else {
|
||||
auto skipped = lexer.consume_until(is_any_of("$ \n"sv));
|
||||
position.column += skipped.length();
|
||||
continue;
|
||||
}
|
||||
|
||||
auto variable_name = lexer.consume_until(is_any_of("} \n"sv));
|
||||
position.column += variable_name.length();
|
||||
if (lexer.next_is('}')) {
|
||||
lexer.ignore();
|
||||
position.column++;
|
||||
variable_references.empend(variable_name, start, position);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
lexer.ignore();
|
||||
position.column++;
|
||||
}
|
||||
|
||||
return variable_references;
|
||||
}
|
||||
|
||||
Position Lexer::position() const
|
||||
{
|
||||
return Position {
|
||||
.line = m_line,
|
||||
.column = tell() - m_string_offset_after_previous_newline,
|
||||
};
|
||||
}
|
||||
|
||||
void Lexer::next_line()
|
||||
{
|
||||
VERIFY(consume_specific('\n'));
|
||||
m_string_offset_after_previous_newline = tell();
|
||||
m_line++;
|
||||
}
|
||||
|
||||
void Lexer::emit_token(Token::Type type, StringView value, Position start, Position end, Optional<ControlKeywordType> control_keyword, Vector<VariableReference> variable_references)
|
||||
{
|
||||
dbgln_if(CMAKE_DEBUG, "Emitting {} token: `{}` ({}:{} to {}:{})", to_string(type), value, start.line, start.column, end.line, end.column);
|
||||
m_tokens.empend(type, value, start, end, move(control_keyword), move(variable_references));
|
||||
}
|
||||
|
||||
}
|
51
Userland/Libraries/LibCMake/Lexer.h
Normal file
51
Userland/Libraries/LibCMake/Lexer.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibCMake/Token.h>
|
||||
|
||||
namespace CMake {
|
||||
|
||||
class Lexer : private GenericLexer {
|
||||
public:
|
||||
static ErrorOr<Vector<Token>> lex(StringView input);
|
||||
|
||||
private:
|
||||
Lexer(StringView input);
|
||||
|
||||
ErrorOr<Vector<Token>> lex_file();
|
||||
|
||||
void skip_whitespace();
|
||||
|
||||
void consume_whitespace_or_comments();
|
||||
void consume_command_invocation();
|
||||
void consume_arguments();
|
||||
void consume_argument();
|
||||
void consume_bracket_argument();
|
||||
void consume_quoted_argument();
|
||||
void consume_unquoted_argument();
|
||||
void consume_comment();
|
||||
void consume_open_paren();
|
||||
void consume_close_paren();
|
||||
void consume_garbage();
|
||||
|
||||
StringView read_bracket_argument();
|
||||
static Vector<VariableReference> parse_variable_references_from_argument(StringView argument_value, Position argument_start);
|
||||
|
||||
Position position() const;
|
||||
void next_line();
|
||||
|
||||
void emit_token(Token::Type, StringView value, Position start, Position end, Optional<ControlKeywordType> = {}, Vector<VariableReference> = {});
|
||||
|
||||
Vector<Token> m_tokens;
|
||||
size_t m_line { 0 };
|
||||
size_t m_string_offset_after_previous_newline { 0 };
|
||||
};
|
||||
|
||||
}
|
44
Userland/Libraries/LibCMake/Token.cpp
Normal file
44
Userland/Libraries/LibCMake/Token.cpp
Normal file
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "Token.h"
|
||||
|
||||
namespace CMake {
|
||||
|
||||
Optional<ControlKeywordType> control_keyword_from_string(StringView value)
|
||||
{
|
||||
if (value.equals_ignoring_case("if"sv))
|
||||
return ControlKeywordType::If;
|
||||
if (value.equals_ignoring_case("elseif"sv))
|
||||
return ControlKeywordType::ElseIf;
|
||||
if (value.equals_ignoring_case("else"sv))
|
||||
return ControlKeywordType::Else;
|
||||
if (value.equals_ignoring_case("endif"sv))
|
||||
return ControlKeywordType::EndIf;
|
||||
if (value.equals_ignoring_case("foreach"sv))
|
||||
return ControlKeywordType::ForEach;
|
||||
if (value.equals_ignoring_case("endforeach"sv))
|
||||
return ControlKeywordType::EndForEach;
|
||||
if (value.equals_ignoring_case("while"sv))
|
||||
return ControlKeywordType::While;
|
||||
if (value.equals_ignoring_case("endwhile"sv))
|
||||
return ControlKeywordType::EndWhile;
|
||||
if (value.equals_ignoring_case("break"sv))
|
||||
return ControlKeywordType::Break;
|
||||
if (value.equals_ignoring_case("continue"sv))
|
||||
return ControlKeywordType::Continue;
|
||||
if (value.equals_ignoring_case("macro"sv))
|
||||
return ControlKeywordType::Macro;
|
||||
if (value.equals_ignoring_case("endmacro"sv))
|
||||
return ControlKeywordType::EndMacro;
|
||||
if (value.equals_ignoring_case("function"sv))
|
||||
return ControlKeywordType::Function;
|
||||
if (value.equals_ignoring_case("endfunction"sv))
|
||||
return ControlKeywordType::EndFunction;
|
||||
return {};
|
||||
}
|
||||
|
||||
}
|
102
Userland/Libraries/LibCMake/Token.h
Normal file
102
Userland/Libraries/LibCMake/Token.h
Normal file
|
@ -0,0 +1,102 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace CMake {
|
||||
|
||||
struct Position {
|
||||
size_t line { 0 };
|
||||
size_t column { 0 };
|
||||
};
|
||||
|
||||
struct VariableReference {
|
||||
StringView value;
|
||||
Position start;
|
||||
Position end;
|
||||
};
|
||||
|
||||
enum class ControlKeywordType {
|
||||
If,
|
||||
ElseIf,
|
||||
Else,
|
||||
EndIf,
|
||||
ForEach,
|
||||
EndForEach,
|
||||
While,
|
||||
EndWhile,
|
||||
Break,
|
||||
Continue,
|
||||
Macro,
|
||||
EndMacro,
|
||||
Function,
|
||||
EndFunction,
|
||||
};
|
||||
|
||||
struct Token {
|
||||
enum class Type {
|
||||
BracketComment,
|
||||
LineComment,
|
||||
Identifier,
|
||||
ControlKeyword,
|
||||
OpenParen,
|
||||
CloseParen,
|
||||
BracketArgument,
|
||||
QuotedArgument,
|
||||
UnquotedArgument,
|
||||
Garbage,
|
||||
|
||||
// These are elements inside argument tokens
|
||||
VariableReference,
|
||||
};
|
||||
|
||||
Type type;
|
||||
StringView value;
|
||||
|
||||
Position start;
|
||||
Position end;
|
||||
|
||||
// Type-specific
|
||||
Optional<ControlKeywordType> control_keyword {};
|
||||
Vector<VariableReference> variable_references {};
|
||||
};
|
||||
|
||||
static constexpr StringView to_string(Token::Type type)
|
||||
{
|
||||
switch (type) {
|
||||
case Token::Type::BracketComment:
|
||||
return "BracketComment"sv;
|
||||
case Token::Type::LineComment:
|
||||
return "LineComment"sv;
|
||||
case Token::Type::Identifier:
|
||||
return "Identifier"sv;
|
||||
case Token::Type::ControlKeyword:
|
||||
return "ControlKeyword"sv;
|
||||
case Token::Type::OpenParen:
|
||||
return "OpenParen"sv;
|
||||
case Token::Type::CloseParen:
|
||||
return "CloseParen"sv;
|
||||
case Token::Type::BracketArgument:
|
||||
return "BracketArgument"sv;
|
||||
case Token::Type::QuotedArgument:
|
||||
return "QuotedArgument"sv;
|
||||
case Token::Type::UnquotedArgument:
|
||||
return "UnquotedArgument"sv;
|
||||
case Token::Type::Garbage:
|
||||
return "Garbage"sv;
|
||||
case Token::Type::VariableReference:
|
||||
return "VariableReference"sv;
|
||||
}
|
||||
|
||||
VERIFY_NOT_REACHED();
|
||||
}
|
||||
|
||||
Optional<ControlKeywordType> control_keyword_from_string(StringView value);
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue