1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 20:07:34 +00:00

LibMarkdown: Rewrite Inline text parser to be more forgiving

The previous Text::parse was not able to give up on parsing a textual
element, and just leave it as plain text. Because this is a very
important part of markdown, I fully rewrote the parser to support this
without having to backtrack. Also the parser now some other little
features, such ast delimiter runs and flanking.
This commit is contained in:
Peter Elliott 2021-09-06 19:11:46 -06:00 committed by Andreas Kling
parent 80e58dab9a
commit ec9f892899
10 changed files with 462 additions and 397 deletions

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
* Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -7,48 +8,138 @@
#pragma once
#include <AK/Noncopyable.h>
#include <AK/NonnullOwnPtrVector.h>
#include <AK/OwnPtr.h>
#include <AK/String.h>
#include <AK/Vector.h>
namespace Markdown {
class Text final {
AK_MAKE_NONCOPYABLE(Text);
public:
struct Style {
bool emph { false };
bool strong { false };
bool code { false };
String href;
String img;
class Node {
public:
virtual void render_to_html(StringBuilder& builder) const = 0;
virtual void render_for_terminal(StringBuilder& builder) const = 0;
virtual size_t terminal_length() const = 0;
virtual ~Node() { }
};
struct Span {
class EmphasisNode : public Node {
public:
bool strong;
NonnullOwnPtr<Node> child;
EmphasisNode(bool strong, NonnullOwnPtr<Node> child)
: strong(strong)
, child(move(child))
{
}
virtual void render_to_html(StringBuilder& builder) const override;
virtual void render_for_terminal(StringBuilder& builder) const override;
virtual size_t terminal_length() const override;
};
class CodeNode : public Node {
public:
NonnullOwnPtr<Node> code;
CodeNode(NonnullOwnPtr<Node> code)
: code(move(code))
{
}
virtual void render_to_html(StringBuilder& builder) const override;
virtual void render_for_terminal(StringBuilder& builder) const override;
virtual size_t terminal_length() const override;
};
class TextNode : public Node {
public:
String text;
Style style;
TextNode(StringView const& text)
: text(text)
{
}
virtual void render_to_html(StringBuilder& builder) const override;
virtual void render_for_terminal(StringBuilder& builder) const override;
virtual size_t terminal_length() const override;
};
explicit Text(String&& text);
Text(Text&& text) = default;
Text() = default;
class LinkNode : public Node {
public:
bool is_image;
NonnullOwnPtr<Node> text;
NonnullOwnPtr<Node> href;
Text& operator=(Text&&) = default;
LinkNode(bool is_image, NonnullOwnPtr<Node> text, NonnullOwnPtr<Node> href)
: is_image(is_image)
, text(move(text))
, href(move(href))
{
}
const Vector<Span>& spans() const { return m_spans; }
virtual void render_to_html(StringBuilder& builder) const override;
virtual void render_for_terminal(StringBuilder& builder) const override;
virtual size_t terminal_length() const override;
};
class MultiNode : public Node {
public:
NonnullOwnPtrVector<Node> children;
virtual void render_to_html(StringBuilder& builder) const override;
virtual void render_for_terminal(StringBuilder& builder) const override;
virtual size_t terminal_length() const override;
};
size_t terminal_length() const;
String render_to_html() const;
String render_for_terminal() const;
static Optional<Text> parse(const StringView&);
static Text parse(StringView const&);
private:
Text(Vector<Span>&& spans)
: m_spans(move(spans))
{
}
struct Token {
String data;
// Flanking basically means that a delimiter run has a non-whitespace,
// non-punctuation character on the corresponsing side. For a more exact
// definition, see the CommonMark spec.
bool left_flanking;
bool right_flanking;
// is_run indicates that this token is a 'delimiter run'. A delimiter
// run occurs when several of the same sytactical character ('`', '_',
// or '*') occur in a row.
bool is_run;
Vector<Span> m_spans;
char run_char() const
{
VERIFY(is_run);
return data[0];
}
char run_length() const
{
VERIFY(is_run);
return data.length();
}
bool operator==(StringView const& str) const { return str == data; }
};
static Vector<Token> tokenize(StringView const&);
static bool can_open(Token const& opening);
static bool can_close_for(Token const& opening, Token const& closing);
static NonnullOwnPtr<MultiNode> parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link);
static NonnullOwnPtr<Node> parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link);
static NonnullOwnPtr<Node> parse_code(Vector<Token>::ConstIterator& tokens);
static NonnullOwnPtr<Node> parse_link(Vector<Token>::ConstIterator& tokens);
OwnPtr<Node> m_node;
};
}