1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 09:27:35 +00:00

LibGUI+HackStudio: Move syntax highlighting from HackStudio to LibGUI

This patch introduces the GUI::SyntaxHighlighter class, which can be
attached to a GUI::TextEditor to provide syntax highlighting.

The C++ syntax highlighting from HackStudio becomes a new class called
GUI::CppSyntaxHighlighter. This will make it possible to get C++ syntax
highlighting in any app that uses a GUI::TextEditor. :^)

Sidenote: It does feel a bit weird having a C++ lexer in a GUI toolkit
library, and we'll probably end up moving this out to a separate place
as this functionality grows larger.
This commit is contained in:
Andreas Kling 2020-02-07 20:07:15 +01:00
parent 6cf49c23d4
commit bb8e65be41
13 changed files with 277 additions and 170 deletions

View file

@ -1,370 +0,0 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "CppLexer.h"
#include <AK/HashTable.h>
#include <AK/String.h>
#include <ctype.h>
CppLexer::CppLexer(const StringView& input)
: m_input(input)
{
}
char CppLexer::peek(size_t offset) const
{
if ((m_index + offset) >= m_input.length())
return 0;
return m_input[m_index + offset];
}
char CppLexer::consume()
{
ASSERT(m_index < m_input.length());
char ch = m_input[m_index++];
m_previous_position = m_position;
if (ch == '\n') {
m_position.line++;
m_position.column = 0;
} else {
m_position.column++;
}
return ch;
}
static bool is_valid_first_character_of_identifier(char ch)
{
return isalpha(ch) || ch == '_' || ch == '$';
}
static bool is_valid_nonfirst_character_of_identifier(char ch)
{
return is_valid_first_character_of_identifier(ch) || isdigit(ch);
}
static bool is_keyword(const StringView& string)
{
static HashTable<String> keywords;
if (keywords.is_empty()) {
keywords.set("alignas");
keywords.set("alignof");
keywords.set("and");
keywords.set("and_eq");
keywords.set("asm");
keywords.set("bitand");
keywords.set("bitor");
keywords.set("bool");
keywords.set("break");
keywords.set("case");
keywords.set("catch");
keywords.set("class");
keywords.set("compl");
keywords.set("const");
keywords.set("const_cast");
keywords.set("constexpr");
keywords.set("continue");
keywords.set("decltype");
keywords.set("default");
keywords.set("delete");
keywords.set("do");
keywords.set("dynamic_cast");
keywords.set("else");
keywords.set("enum");
keywords.set("explicit");
keywords.set("export");
keywords.set("extern");
keywords.set("false");
keywords.set("final");
keywords.set("for");
keywords.set("friend");
keywords.set("goto");
keywords.set("if");
keywords.set("inline");
keywords.set("mutable");
keywords.set("namespace");
keywords.set("new");
keywords.set("noexcept");
keywords.set("not");
keywords.set("not_eq");
keywords.set("nullptr");
keywords.set("operator");
keywords.set("or");
keywords.set("or_eq");
keywords.set("override");
keywords.set("private");
keywords.set("protected");
keywords.set("public");
keywords.set("register");
keywords.set("reinterpret_cast");
keywords.set("return");
keywords.set("signed");
keywords.set("sizeof");
keywords.set("static");
keywords.set("static_assert");
keywords.set("static_cast");
keywords.set("struct");
keywords.set("switch");
keywords.set("template");
keywords.set("this");
keywords.set("thread_local");
keywords.set("throw");
keywords.set("true");
keywords.set("try");
keywords.set("typedef");
keywords.set("typeid");
keywords.set("typename");
keywords.set("union");
keywords.set("using");
keywords.set("virtual");
keywords.set("volatile");
keywords.set("while");
keywords.set("xor");
keywords.set("xor_eq");
}
return keywords.contains(string);
}
static bool is_known_type(const StringView& string)
{
static HashTable<String> types;
if (types.is_empty()) {
types.set("ByteBuffer");
types.set("CircularDeque");
types.set("CircularQueue");
types.set("Deque");
types.set("DoublyLinkedList");
types.set("FileSystemPath");
types.set("FixedArray");
types.set("Function");
types.set("HashMap");
types.set("HashTable");
types.set("IPv4Address");
types.set("InlineLinkedList");
types.set("IntrusiveList");
types.set("JsonArray");
types.set("JsonObject");
types.set("JsonValue");
types.set("MappedFile");
types.set("NetworkOrdered");
types.set("NonnullOwnPtr");
types.set("NonnullOwnPtrVector");
types.set("NonnullRefPtr");
types.set("NonnullRefPtrVector");
types.set("Optional");
types.set("OwnPtr");
types.set("RefPtr");
types.set("Result");
types.set("ScopeGuard");
types.set("SinglyLinkedList");
types.set("String");
types.set("StringBuilder");
types.set("StringImpl");
types.set("StringView");
types.set("Utf8View");
types.set("Vector");
types.set("WeakPtr");
types.set("auto");
types.set("char");
types.set("char16_t");
types.set("char32_t");
types.set("char8_t");
types.set("double");
types.set("float");
types.set("i16");
types.set("i32");
types.set("i64");
types.set("i8");
types.set("int");
types.set("int");
types.set("long");
types.set("short");
types.set("signed");
types.set("u16");
types.set("u32");
types.set("u64");
types.set("u8");
types.set("unsigned");
types.set("void");
types.set("wchar_t");
}
return types.contains(string);
}
Vector<CppToken> CppLexer::lex()
{
Vector<CppToken> tokens;
size_t token_start_index = 0;
CppPosition token_start_position;
auto emit_token = [&](auto type) {
CppToken token;
token.m_type = type;
token.m_start = m_position;
token.m_end = m_position;
tokens.append(token);
consume();
};
auto begin_token = [&] {
token_start_index = m_index;
token_start_position = m_position;
};
auto commit_token = [&](auto type) {
CppToken token;
token.m_type = type;
token.m_start = token_start_position;
token.m_end = m_previous_position;
tokens.append(token);
};
while (m_index < m_input.length()) {
auto ch = peek();
if (isspace(ch)) {
begin_token();
while (isspace(peek()))
consume();
commit_token(CppToken::Type::Whitespace);
continue;
}
if (ch == '(') {
emit_token(CppToken::Type::LeftParen);
continue;
}
if (ch == ')') {
emit_token(CppToken::Type::RightParen);
continue;
}
if (ch == '{') {
emit_token(CppToken::Type::LeftCurly);
continue;
}
if (ch == '}') {
emit_token(CppToken::Type::RightCurly);
continue;
}
if (ch == '[') {
emit_token(CppToken::Type::LeftBracket);
continue;
}
if (ch == ']') {
emit_token(CppToken::Type::RightBracket);
continue;
}
if (ch == ',') {
emit_token(CppToken::Type::Comma);
continue;
}
if (ch == '*') {
emit_token(CppToken::Type::Asterisk);
continue;
}
if (ch == ';') {
emit_token(CppToken::Type::Semicolon);
continue;
}
if (ch == '#') {
begin_token();
while (peek() && peek() != '\n')
consume();
commit_token(CppToken::Type::PreprocessorStatement);
continue;
}
if (ch == '/' && peek(1) == '/') {
begin_token();
while (peek() && peek() != '\n')
consume();
commit_token(CppToken::Type::Comment);
continue;
}
if (ch == '/' && peek(1) == '*') {
begin_token();
consume();
consume();
bool comment_block_ends = false;
while (peek()) {
if (peek() == '*' && peek(1) == '/') {
comment_block_ends = true;
break;
}
consume();
}
if (comment_block_ends) {
consume();
consume();
}
commit_token(CppToken::Type::Comment);
continue;
}
if (ch == '"') {
begin_token();
consume();
while (peek()) {
if (consume() == '"')
break;
}
commit_token(CppToken::Type::DoubleQuotedString);
continue;
}
if (ch == '\'') {
begin_token();
consume();
while (peek()) {
if (consume() == '\'')
break;
}
commit_token(CppToken::Type::SingleQuotedString);
continue;
}
if (isdigit(ch)) {
begin_token();
while (peek() && isdigit(peek())) {
consume();
}
commit_token(CppToken::Type::Number);
continue;
}
if (is_valid_first_character_of_identifier(ch)) {
begin_token();
while (peek() && is_valid_nonfirst_character_of_identifier(peek()))
consume();
auto token_view = StringView(m_input.characters_without_null_termination() + token_start_index, m_index - token_start_index);
if (is_keyword(token_view))
commit_token(CppToken::Type::Keyword);
else if (is_known_type(token_view))
commit_token(CppToken::Type::KnownType);
else
commit_token(CppToken::Type::Identifier);
continue;
}
dbg() << "Unimplemented token character: " << ch;
emit_token(CppToken::Type::Unknown);
}
return tokens;
}

View file

@ -1,96 +0,0 @@
/*
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/StringView.h>
#include <AK/Vector.h>
#define FOR_EACH_TOKEN_TYPE \
__TOKEN(Unknown) \
__TOKEN(Whitespace) \
__TOKEN(PreprocessorStatement) \
__TOKEN(LeftParen) \
__TOKEN(RightParen) \
__TOKEN(LeftCurly) \
__TOKEN(RightCurly) \
__TOKEN(LeftBracket) \
__TOKEN(RightBracket) \
__TOKEN(Comma) \
__TOKEN(Asterisk) \
__TOKEN(Semicolon) \
__TOKEN(DoubleQuotedString) \
__TOKEN(SingleQuotedString) \
__TOKEN(Comment) \
__TOKEN(Number) \
__TOKEN(Keyword) \
__TOKEN(KnownType) \
__TOKEN(Identifier)
struct CppPosition {
size_t line;
size_t column;
};
struct CppToken {
enum class Type {
#define __TOKEN(x) x,
FOR_EACH_TOKEN_TYPE
#undef __TOKEN
};
const char* to_string() const
{
switch (m_type) {
#define __TOKEN(x) \
case Type::x: \
return #x;
FOR_EACH_TOKEN_TYPE
#undef __TOKEN
}
ASSERT_NOT_REACHED();
}
Type m_type { Type::Unknown };
CppPosition m_start;
CppPosition m_end;
};
class CppLexer {
public:
CppLexer(const StringView&);
Vector<CppToken> lex();
private:
char peek(size_t offset = 0) const;
char consume();
StringView m_input;
size_t m_index { 0 };
CppPosition m_previous_position { 0, 0 };
CppPosition m_position { 0, 0 };
};

View file

@ -25,7 +25,6 @@
*/
#include "Editor.h"
#include "CppLexer.h"
#include "EditorWrapper.h"
#include <AK/FileSystemPath.h>
#include <LibCore/DirIterator.h>
@ -197,98 +196,3 @@ void Editor::mousemove_event(GUI::MouseEvent& event)
}
GUI::Application::the().hide_tooltip();
}
void Editor::highlight_matching_token_pair()
{
enum class Direction {
Forward,
Backward,
};
auto find_span_of_type = [&](int i, CppToken::Type type, CppToken::Type not_type, Direction direction) {
int nesting_level = 0;
bool forward = direction == Direction::Forward;
for (forward ? ++i : --i; forward ? (i < document().spans().size()) : (i >= 0); forward ? ++i : --i) {
auto& span = document().spans().at(i);
auto span_token_type = (CppToken::Type)((uintptr_t)span.data);
if (span_token_type == not_type) {
++nesting_level;
} else if (span_token_type == type) {
if (nesting_level-- <= 0)
return i;
}
}
return -1;
};
auto make_buddies = [&](int index0, int index1) {
auto& buddy0 = const_cast<GUI::TextDocumentSpan&>(document().spans()[index0]);
auto& buddy1 = const_cast<GUI::TextDocumentSpan&>(document().spans()[index1]);
m_has_brace_buddies = true;
m_brace_buddies[0].index = index0;
m_brace_buddies[1].index = index1;
m_brace_buddies[0].span_backup = buddy0;
m_brace_buddies[1].span_backup = buddy1;
buddy0.background_color = Color::DarkCyan;
buddy1.background_color = Color::DarkCyan;
buddy0.color = Color::White;
buddy1.color = Color::White;
update();
};
struct MatchingTokenPair {
CppToken::Type open;
CppToken::Type close;
};
MatchingTokenPair pairs[] = {
{ CppToken::Type::LeftCurly, CppToken::Type::RightCurly },
{ CppToken::Type::LeftParen, CppToken::Type::RightParen },
{ CppToken::Type::LeftBracket, CppToken::Type::RightBracket },
};
for (int i = 0; i < document().spans().size(); ++i) {
auto& span = const_cast<GUI::TextDocumentSpan&>(document().spans().at(i));
auto token_type = (CppToken::Type)((uintptr_t)span.data);
for (auto& pair : pairs) {
if (token_type == pair.open && span.range.start() == cursor()) {
auto buddy = find_span_of_type(i, pair.close, pair.open, Direction::Forward);
if (buddy != -1)
make_buddies(i, buddy);
return;
}
}
auto right_of_end = span.range.end();
right_of_end.set_column(right_of_end.column() + 1);
for (auto& pair : pairs) {
if (token_type == pair.close && right_of_end == cursor()) {
auto buddy = find_span_of_type(i, pair.open, pair.close, Direction::Backward);
if (buddy != -1)
make_buddies(i, buddy);
return;
}
}
}
}
void Editor::cursor_did_change()
{
if (m_has_brace_buddies) {
if (m_brace_buddies[0].index >= 0 && m_brace_buddies[0].index < document().spans().size())
document().set_span_at_index(m_brace_buddies[0].index, m_brace_buddies[0].span_backup);
if (m_brace_buddies[1].index >= 0 && m_brace_buddies[1].index < document().spans().size())
document().set_span_at_index(m_brace_buddies[1].index, m_brace_buddies[1].span_backup);
m_has_brace_buddies = false;
update();
}
highlight_matching_token_pair();
}
void Editor::notify_did_rehighlight()
{
m_has_brace_buddies = false;
highlight_matching_token_pair();
}

View file

@ -41,29 +41,17 @@ public:
EditorWrapper& wrapper();
const EditorWrapper& wrapper() const;
void notify_did_rehighlight();
private:
virtual void focusin_event(Core::Event&) override;
virtual void focusout_event(Core::Event&) override;
virtual void paint_event(GUI::PaintEvent&) override;
virtual void mousemove_event(GUI::MouseEvent&) override;
virtual void cursor_did_change() override;
void show_documentation_tooltip_if_available(const String&, const Gfx::Point& screen_location);
void highlight_matching_token_pair();
explicit Editor(GUI::Widget* parent);
RefPtr<GUI::Window> m_documentation_tooltip_window;
RefPtr<HtmlView> m_documentation_html_view;
String m_last_parsed_token;
struct BuddySpan {
int index { -1 };
GUI::TextDocumentSpan span_backup;
};
bool m_has_brace_buddies { false };
BuddySpan m_brace_buddies[2];
};

View file

@ -6,7 +6,6 @@ OBJS = \
ProcessStateWidget.o \
FormEditorWidget.o \
FormWidget.o \
CppLexer.o \
Editor.o \
EditorWrapper.o \
Locator.o \

View file

@ -24,7 +24,6 @@
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "CppLexer.h"
#include "CursorTool.h"
#include "Editor.h"
#include "EditorWrapper.h"
@ -44,6 +43,7 @@
#include <LibGUI/Application.h>
#include <LibGUI/BoxLayout.h>
#include <LibGUI/Button.h>
#include <LibGUI/CppSyntaxHighlighter.h>
#include <LibGUI/FilePicker.h>
#include <LibGUI/InputBox.h>
#include <LibGUI/Label.h>
@ -548,70 +548,13 @@ void run(TerminalWrapper& wrapper)
wrapper.run_command("make run");
}
struct TextStyle {
Color color;
const Gfx::Font* font { nullptr };
};
static TextStyle style_for_token_type(CppToken::Type type)
{
switch (type) {
case CppToken::Type::Keyword:
return { Color::Black, &Gfx::Font::default_bold_fixed_width_font() };
case CppToken::Type::KnownType:
return { Color::from_rgb(0x929200), &Gfx::Font::default_bold_fixed_width_font() };
case CppToken::Type::Identifier:
return { Color::from_rgb(0x000092) };
case CppToken::Type::DoubleQuotedString:
case CppToken::Type::SingleQuotedString:
case CppToken::Type::Number:
return { Color::from_rgb(0x920000) };
case CppToken::Type::PreprocessorStatement:
return { Color::from_rgb(0x009292) };
case CppToken::Type::Comment:
return { Color::from_rgb(0x009200) };
default:
return { Color::Black };
}
}
static void rehighlight()
{
auto text = current_editor().text();
CppLexer lexer(text);
auto tokens = lexer.lex();
Vector<GUI::TextDocumentSpan> spans;
for (auto& token : tokens) {
#ifdef DEBUG_SYNTAX_HIGHLIGHTING
dbg() << token.to_string() << " @ " << token.m_start.line << ":" << token.m_start.column << " - " << token.m_end.line << ":" << token.m_end.column;
#endif
GUI::TextDocumentSpan span;
span.range.set_start({ token.m_start.line, token.m_start.column });
span.range.set_end({ token.m_end.line, token.m_end.column });
auto style = style_for_token_type(token.m_type);
span.color = style.color;
span.font = style.font;
span.is_skippable = token.m_type == CppToken::Type::Whitespace;
span.data = (void*)token.m_type;
spans.append(span);
}
current_editor().document().set_spans(spans);
static_cast<Editor&>(current_editor()).notify_did_rehighlight();
current_editor().update();
}
void open_file(const String& filename)
{
auto file = g_project->get_file(filename);
current_editor().set_document(const_cast<GUI::TextDocument&>(file->document()));
if (filename.ends_with(".cpp") || filename.ends_with(".h")) {
current_editor().on_change = [] { rehighlight(); };
rehighlight();
} else {
current_editor().on_change = nullptr;
}
if (filename.ends_with(".cpp") || filename.ends_with(".h"))
current_editor().set_syntax_highlighter(make<GUI::CppSyntaxHighlighter>());
if (filename.ends_with(".frm")) {
set_edit_mode(EditMode::Form);