mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 11:27:35 +00:00
LibMarkdown: Rewrite Inline text parser to be more forgiving
The previous Text::parse was not able to give up on parsing a textual element, and just leave it as plain text. Because this is a very important part of markdown, I fully rewrote the parser to support this without having to backtrack. Also the parser now some other little features, such ast delimiter runs and flanking.
This commit is contained in:
parent
80e58dab9a
commit
ec9f892899
10 changed files with 462 additions and 397 deletions
|
@ -1,5 +1,6 @@
|
|||
/*
|
||||
* Copyright (c) 2019-2020, Sergey Bugaev <bugaevc@serenityos.org>
|
||||
* Copyright (c) 2021, Peter Elliott <pelliott@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -7,48 +8,138 @@
|
|||
#pragma once
|
||||
|
||||
#include <AK/Noncopyable.h>
|
||||
#include <AK/NonnullOwnPtrVector.h>
|
||||
#include <AK/OwnPtr.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Markdown {
|
||||
|
||||
class Text final {
|
||||
AK_MAKE_NONCOPYABLE(Text);
|
||||
|
||||
public:
|
||||
struct Style {
|
||||
bool emph { false };
|
||||
bool strong { false };
|
||||
bool code { false };
|
||||
String href;
|
||||
String img;
|
||||
class Node {
|
||||
public:
|
||||
virtual void render_to_html(StringBuilder& builder) const = 0;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const = 0;
|
||||
virtual size_t terminal_length() const = 0;
|
||||
|
||||
virtual ~Node() { }
|
||||
};
|
||||
|
||||
struct Span {
|
||||
class EmphasisNode : public Node {
|
||||
public:
|
||||
bool strong;
|
||||
NonnullOwnPtr<Node> child;
|
||||
|
||||
EmphasisNode(bool strong, NonnullOwnPtr<Node> child)
|
||||
: strong(strong)
|
||||
, child(move(child))
|
||||
{
|
||||
}
|
||||
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
class CodeNode : public Node {
|
||||
public:
|
||||
NonnullOwnPtr<Node> code;
|
||||
|
||||
CodeNode(NonnullOwnPtr<Node> code)
|
||||
: code(move(code))
|
||||
{
|
||||
}
|
||||
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
class TextNode : public Node {
|
||||
public:
|
||||
String text;
|
||||
Style style;
|
||||
|
||||
TextNode(StringView const& text)
|
||||
: text(text)
|
||||
{
|
||||
}
|
||||
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
explicit Text(String&& text);
|
||||
Text(Text&& text) = default;
|
||||
Text() = default;
|
||||
class LinkNode : public Node {
|
||||
public:
|
||||
bool is_image;
|
||||
NonnullOwnPtr<Node> text;
|
||||
NonnullOwnPtr<Node> href;
|
||||
|
||||
Text& operator=(Text&&) = default;
|
||||
LinkNode(bool is_image, NonnullOwnPtr<Node> text, NonnullOwnPtr<Node> href)
|
||||
: is_image(is_image)
|
||||
, text(move(text))
|
||||
, href(move(href))
|
||||
{
|
||||
}
|
||||
|
||||
const Vector<Span>& spans() const { return m_spans; }
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
class MultiNode : public Node {
|
||||
public:
|
||||
NonnullOwnPtrVector<Node> children;
|
||||
|
||||
virtual void render_to_html(StringBuilder& builder) const override;
|
||||
virtual void render_for_terminal(StringBuilder& builder) const override;
|
||||
virtual size_t terminal_length() const override;
|
||||
};
|
||||
|
||||
size_t terminal_length() const;
|
||||
|
||||
String render_to_html() const;
|
||||
String render_for_terminal() const;
|
||||
|
||||
static Optional<Text> parse(const StringView&);
|
||||
static Text parse(StringView const&);
|
||||
|
||||
private:
|
||||
Text(Vector<Span>&& spans)
|
||||
: m_spans(move(spans))
|
||||
{
|
||||
}
|
||||
struct Token {
|
||||
String data;
|
||||
// Flanking basically means that a delimiter run has a non-whitespace,
|
||||
// non-punctuation character on the corresponsing side. For a more exact
|
||||
// definition, see the CommonMark spec.
|
||||
bool left_flanking;
|
||||
bool right_flanking;
|
||||
// is_run indicates that this token is a 'delimiter run'. A delimiter
|
||||
// run occurs when several of the same sytactical character ('`', '_',
|
||||
// or '*') occur in a row.
|
||||
bool is_run;
|
||||
|
||||
Vector<Span> m_spans;
|
||||
char run_char() const
|
||||
{
|
||||
VERIFY(is_run);
|
||||
return data[0];
|
||||
}
|
||||
char run_length() const
|
||||
{
|
||||
VERIFY(is_run);
|
||||
return data.length();
|
||||
}
|
||||
bool operator==(StringView const& str) const { return str == data; }
|
||||
};
|
||||
|
||||
static Vector<Token> tokenize(StringView const&);
|
||||
|
||||
static bool can_open(Token const& opening);
|
||||
static bool can_close_for(Token const& opening, Token const& closing);
|
||||
|
||||
static NonnullOwnPtr<MultiNode> parse_sequence(Vector<Token>::ConstIterator& tokens, bool in_link);
|
||||
static NonnullOwnPtr<Node> parse_emph(Vector<Token>::ConstIterator& tokens, bool in_link);
|
||||
static NonnullOwnPtr<Node> parse_code(Vector<Token>::ConstIterator& tokens);
|
||||
static NonnullOwnPtr<Node> parse_link(Vector<Token>::ConstIterator& tokens);
|
||||
|
||||
OwnPtr<Node> m_node;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue