diff --git a/Userland/Libraries/LibMarkdown/CodeBlock.cpp b/Userland/Libraries/LibMarkdown/CodeBlock.cpp index 3f8d80a7f4..4b7ccba52e 100644 --- a/Userland/Libraries/LibMarkdown/CodeBlock.cpp +++ b/Userland/Libraries/LibMarkdown/CodeBlock.cpp @@ -10,51 +10,24 @@ namespace Markdown { -Text::Style CodeBlock::style() const -{ - if (m_style_spec.spans().is_empty()) - return {}; - return m_style_spec.spans()[0].style; -} - -String CodeBlock::style_language() const -{ - if (m_style_spec.spans().is_empty()) - return {}; - return m_style_spec.spans()[0].text; -} - String CodeBlock::render_to_html() const { StringBuilder builder; - String style_language = this->style_language(); - Text::Style style = this->style(); - builder.append("
");
 
-    if (style.strong)
-        builder.append("");
-    if (style.emph)
-        builder.append("");
-
-    if (style_language.is_empty())
+    if (m_language.is_empty())
         builder.append("");
     else
-        builder.appendff("", escape_html_entities(style_language));
+        builder.appendff("", escape_html_entities(m_language));
 
-    if (style_language == "js")
+    if (m_language == "js")
         builder.append(JS::MarkupGenerator::html_from_source(m_code));
     else
         builder.append(escape_html_entities(m_code));
 
     builder.append("\n");
 
-    if (style.emph)
-        builder.append("");
-    if (style.strong)
-        builder.append("");
-
     builder.append("
\n"); return builder.build(); @@ -64,28 +37,7 @@ String CodeBlock::render_for_terminal(size_t) const { StringBuilder builder; - Text::Style style = this->style(); - bool needs_styling = style.strong || style.emph; - if (needs_styling) { - builder.append("\033["); - bool first = true; - if (style.strong) { - builder.append('1'); - first = false; - } - if (style.emph) { - if (!first) - builder.append(';'); - builder.append('4'); - } - builder.append('m'); - } - builder.append(m_code); - - if (needs_styling) - builder.append("\033[0m"); - builder.append("\n\n"); return builder.build(); @@ -102,21 +54,7 @@ OwnPtr CodeBlock::parse(Vector::ConstIterator& lines) if (!line.starts_with(tick_tick_tick)) return {}; - // Our Markdown extension: we allow - // specifying a style and a language - // for a code block, like so: - // - // ```**sh** - // $ echo hello friends! - // ```` - // - // The code block will be made bold, - // and if possible syntax-highlighted - // as appropriate for a shell script. StringView style_spec = line.substring_view(3, line.length() - 3); - auto spec = Text::parse(style_spec); - if (!spec.has_value()) - return {}; ++lines; @@ -136,7 +74,7 @@ OwnPtr CodeBlock::parse(Vector::ConstIterator& lines) first = false; } - return make(move(spec.value()), builder.build()); + return make(style_spec, builder.build()); } } diff --git a/Userland/Libraries/LibMarkdown/CodeBlock.h b/Userland/Libraries/LibMarkdown/CodeBlock.h index c17fb3333f..2882c1cd0d 100644 --- a/Userland/Libraries/LibMarkdown/CodeBlock.h +++ b/Userland/Libraries/LibMarkdown/CodeBlock.h @@ -14,9 +14,9 @@ namespace Markdown { class CodeBlock final : public Block { public: - CodeBlock(Text&& style_spec, const String& code) + CodeBlock(const String& language, const String& code) : m_code(move(code)) - , m_style_spec(move(style_spec)) + , m_language(language) { } virtual ~CodeBlock() override { } @@ -26,11 +26,8 @@ public: static OwnPtr parse(Vector::ConstIterator& lines); private: - String style_language() const; - Text::Style style() const; - String m_code; - Text m_style_spec; + String m_language; }; } diff --git a/Userland/Libraries/LibMarkdown/Document.cpp b/Userland/Libraries/LibMarkdown/Document.cpp index dd68d9d17a..3e91b9f67f 100644 --- a/Userland/Libraries/LibMarkdown/Document.cpp +++ b/Userland/Libraries/LibMarkdown/Document.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2019-2020, Sergey Bugaev + * Copyright (c) 2021, Peter Elliott * * SPDX-License-Identifier: BSD-2-Clause */ @@ -75,15 +76,16 @@ OwnPtr Document::parse(const StringView& str) auto lines = lines_vec.begin(); auto document = make(); auto& blocks = document->m_blocks; - NonnullOwnPtrVector paragraph_lines; + StringBuilder paragraph_text; auto flush_paragraph = [&] { - if (paragraph_lines.is_empty()) + if (paragraph_text.is_empty()) return; - auto paragraph = make(move(paragraph_lines)); + auto paragraph = make(Text::parse(paragraph_text.build())); document->m_blocks.append(move(paragraph)); - paragraph_lines.clear(); + paragraph_text.clear(); }; + while (true) { if (lines.is_end()) break; @@ -98,7 +100,7 @@ OwnPtr Document::parse(const StringView& str) || helper(lines, blocks) || helper(lines, blocks); if (any) { - if (!paragraph_lines.is_empty()) { + if (!paragraph_text.is_empty()) { auto last_block = document->m_blocks.take_last(); flush_paragraph(); document->m_blocks.append(move(last_block)); @@ -106,15 +108,11 @@ OwnPtr Document::parse(const StringView& str) continue; } - auto line = Paragraph::Line::parse(lines); - if (!line) - return {}; - - paragraph_lines.append(line.release_nonnull()); + paragraph_text.append(*lines++); + paragraph_text.append("\n"); } - if (!paragraph_lines.is_empty()) - flush_paragraph(); + flush_paragraph(); return document; } diff --git a/Userland/Libraries/LibMarkdown/Heading.cpp b/Userland/Libraries/LibMarkdown/Heading.cpp index 8b526debe4..6c97ccb8aa 100644 --- a/Userland/Libraries/LibMarkdown/Heading.cpp +++ b/Userland/Libraries/LibMarkdown/Heading.cpp @@ -53,10 +53,7 @@ OwnPtr Heading::parse(Vector::ConstIterator& lines) StringView title_view = line.substring_view(level + 1, line.length() - level - 1); auto text = Text::parse(title_view); - if (!text.has_value()) - return {}; - - auto heading = make(move(text.value()), level); + auto heading = make(move(text), level); ++lines; return heading; diff --git a/Userland/Libraries/LibMarkdown/List.cpp b/Userland/Libraries/LibMarkdown/List.cpp index 8162090f00..690753eaa9 100644 --- a/Userland/Libraries/LibMarkdown/List.cpp +++ b/Userland/Libraries/LibMarkdown/List.cpp @@ -59,10 +59,7 @@ OwnPtr List::parse(Vector::ConstIterator& lines) return true; auto text = Text::parse(item_builder.string_view()); - if (!text.has_value()) - return false; - - items.append(move(text.value())); + items.append(move(text)); item_builder.clear(); return true; diff --git a/Userland/Libraries/LibMarkdown/Paragraph.cpp b/Userland/Libraries/LibMarkdown/Paragraph.cpp index c90e016b11..c2c1111266 100644 --- a/Userland/Libraries/LibMarkdown/Paragraph.cpp +++ b/Userland/Libraries/LibMarkdown/Paragraph.cpp @@ -13,13 +13,7 @@ String Paragraph::render_to_html() const { StringBuilder builder; builder.append("

"); - bool first = true; - for (auto& line : m_lines) { - if (!first) - builder.append('\n'); - first = false; - builder.append(line.text().render_to_html().trim(" \t")); - } + builder.append(m_text.render_to_html()); builder.append("

\n"); return builder.build(); } @@ -27,26 +21,9 @@ String Paragraph::render_to_html() const String Paragraph::render_for_terminal(size_t) const { StringBuilder builder; - bool first = true; - for (auto& line : m_lines) { - if (!first) - builder.append(' '); - first = false; - builder.append(line.text().render_for_terminal()); - } + builder.append(m_text.render_for_terminal()); builder.append("\n\n"); return builder.build(); } -OwnPtr Paragraph::Line::parse(Vector::ConstIterator& lines) -{ - if (lines.is_end()) - return {}; - - auto text = Text::parse(*lines++); - if (!text.has_value()) - return {}; - - return make(text.release_value()); -} } diff --git a/Userland/Libraries/LibMarkdown/Paragraph.h b/Userland/Libraries/LibMarkdown/Paragraph.h index 4e58e69c2c..4b75e1cd67 100644 --- a/Userland/Libraries/LibMarkdown/Paragraph.h +++ b/Userland/Libraries/LibMarkdown/Paragraph.h @@ -15,22 +15,8 @@ namespace Markdown { class Paragraph final : public Block { public: - class Line { - public: - explicit Line(Text&& text) - : m_text(move(text)) - { - } - - static OwnPtr parse(Vector::ConstIterator& lines); - const Text& text() const { return m_text; } - - private: - Text m_text; - }; - - Paragraph(NonnullOwnPtrVector&& lines) - : m_lines(move(lines)) + Paragraph(Text text) + : m_text(move(text)) { } @@ -40,7 +26,7 @@ public: virtual String render_for_terminal(size_t view_width = 0) const override; private: - NonnullOwnPtrVector m_lines; + Text m_text; }; } diff --git a/Userland/Libraries/LibMarkdown/Table.cpp b/Userland/Libraries/LibMarkdown/Table.cpp index 6c08723123..99c41f6787 100644 --- a/Userland/Libraries/LibMarkdown/Table.cpp +++ b/Userland/Libraries/LibMarkdown/Table.cpp @@ -16,9 +16,7 @@ String Table::render_for_terminal(size_t view_width) const StringBuilder builder; auto write_aligned = [&](const auto& text, auto width, auto alignment) { - size_t original_length = 0; - for (auto& span : text.spans()) - original_length += span.text.length(); + size_t original_length = text.terminal_length(); auto string = text.render_for_terminal(); if (alignment == Alignment::Center) { auto padding_length = (width - original_length) / 2; @@ -137,11 +135,8 @@ OwnPtr Table::parse(Vector::ConstIterator& lines) table->m_columns.resize(header_delimiters.size()); for (size_t i = 0; i < header_segments.size(); ++i) { - auto text_option = Text::parse(header_segments[i]); - if (!text_option.has_value()) - return {}; // An invalid 'text' in the header should just fail the table parse. + auto text = Text::parse(header_segments[i]); - auto text = text_option.release_value(); auto& column = table->m_columns[i]; column.header = move(text); @@ -199,16 +194,10 @@ OwnPtr
Table::parse(Vector::ConstIterator& lines) if (i >= segments.size()) { // Ran out of segments, but still have headers. // Just make an empty cell. - table->m_columns[i].rows.append(Text { "" }); + table->m_columns[i].rows.append(Text::parse("")); } else { - auto text_option = Text::parse(segments[i]); - // We treat an invalid 'text' as a literal. - if (text_option.has_value()) { - auto text = text_option.release_value(); - table->m_columns[i].rows.append(move(text)); - } else { - table->m_columns[i].rows.append(Text { segments[i] }); - } + auto text = Text::parse(segments[i]); + table->m_columns[i].rows.append(move(text)); } } } diff --git a/Userland/Libraries/LibMarkdown/Text.cpp b/Userland/Libraries/LibMarkdown/Text.cpp index ab61792142..05b517cfe7 100644 --- a/Userland/Libraries/LibMarkdown/Text.cpp +++ b/Userland/Libraries/LibMarkdown/Text.cpp @@ -1,5 +1,6 @@ /* * Copyright (c) 2019-2020, Sergey Bugaev + * Copyright (c) 2021, Peter Elliott * * SPDX-License-Identifier: BSD-2-Clause */ @@ -8,268 +9,362 @@ #include #include #include +#include #include namespace Markdown { -static String unescape(const StringView& text) +void Text::EmphasisNode::render_to_html(StringBuilder& builder) const { - StringBuilder builder; - for (size_t i = 0; i < text.length(); ++i) { - if (text[i] == '\\' && i != text.length() - 1) { - builder.append(text[i + 1]); - i++; - continue; - } - builder.append(text[i]); - } - return builder.build(); + builder.append((strong) ? "" : ""); + child->render_to_html(builder); + builder.append((strong) ? "" : ""); } -Text::Text(String&& text) +void Text::EmphasisNode::render_for_terminal(StringBuilder&) const { - m_spans.append({ move(text), Style {} }); + // FIXME. +} + +size_t Text::EmphasisNode::terminal_length() const +{ + return child->terminal_length(); +} + +void Text::CodeNode::render_to_html(StringBuilder& builder) const +{ + builder.append(""); + code->render_to_html(builder); + builder.append(""); +} + +void Text::CodeNode::render_for_terminal(StringBuilder&) const +{ + // FIXME. +} + +size_t Text::CodeNode::terminal_length() const +{ + return code->terminal_length(); +} + +void Text::TextNode::render_to_html(StringBuilder& builder) const +{ + builder.append(escape_html_entities(text)); +} + +void Text::TextNode::render_for_terminal(StringBuilder&) const +{ + // FIXME. +} + +size_t Text::TextNode::terminal_length() const +{ + return text.length(); +} + +void Text::LinkNode::render_to_html(StringBuilder& builder) const +{ + if (is_image) { + builder.append("render_to_html(builder); + builder.append("\" alt=\""); + text->render_to_html(builder); + builder.append("\" >"); + } else { + builder.append("render_to_html(builder); + builder.append("\">"); + text->render_to_html(builder); + builder.append(""); + } +} + +void Text::LinkNode::render_for_terminal(StringBuilder&) const +{ + // FIXME. +} + +size_t Text::LinkNode::terminal_length() const +{ + return text->terminal_length(); +} + +void Text::MultiNode::render_to_html(StringBuilder& builder) const +{ + for (auto& child : children) { + child.render_to_html(builder); + } +} + +void Text::MultiNode::render_for_terminal(StringBuilder&) const +{ + // FIXME. +} + +size_t Text::MultiNode::terminal_length() const +{ + size_t length = 0; + for (auto& child : children) { + length += child.terminal_length(); + } + return length; +} + +size_t Text::terminal_length() const +{ + return m_node->terminal_length(); } String Text::render_to_html() const { StringBuilder builder; - - Vector open_tags; - Style current_style; - - for (auto& span : m_spans) { - struct TagAndFlag { - String tag; - bool Style::*flag; - }; - TagAndFlag tags_and_flags[] = { - { "em", &Style::emph }, - { "b", &Style::strong }, - { "code", &Style::code } - }; - auto it = open_tags.find_if([&](const String& open_tag) { - if (open_tag == "a" && current_style.href != span.style.href) - return true; - if (open_tag == "img" && current_style.img != span.style.img) - return true; - for (auto& tag_and_flag : tags_and_flags) { - if (open_tag == tag_and_flag.tag && !(span.style.*tag_and_flag.flag)) - return true; - } - return false; - }); - - if (!it.is_end()) { - // We found an open tag that should - // not be open for the new span. Close - // it and all the open tags that follow - // it. - for (ssize_t j = open_tags.size() - 1; j >= static_cast(it.index()); --j) { - auto& tag = open_tags[j]; - if (tag == "img") { - builder.append("\" />"); - current_style.img = {}; - continue; - } - builder.appendff("", tag); - if (tag == "a") { - current_style.href = {}; - continue; - } - for (auto& tag_and_flag : tags_and_flags) - if (tag == tag_and_flag.tag) - current_style.*tag_and_flag.flag = false; - } - open_tags.shrink(it.index()); - } - if (current_style.href.is_null() && !span.style.href.is_null()) { - open_tags.append("a"); - builder.appendff("", span.style.href); - } - if (current_style.img.is_null() && !span.style.img.is_null()) { - open_tags.append("img"); - builder.appendff("\"",", tag_and_flag.tag); - } - } - - current_style = span.style; - builder.append(escape_html_entities(span.text)); - } - - for (ssize_t i = open_tags.size() - 1; i >= 0; --i) { - auto& tag = open_tags[i]; - if (tag == "img") { - builder.append("\" />"); - continue; - } - builder.appendff("", tag); - } - - return builder.build(); + m_node->render_to_html(builder); + return builder.build().trim(" \n\t"); } String Text::render_for_terminal() const { StringBuilder builder; - - for (auto& span : m_spans) { - bool needs_styling = span.style.strong || span.style.emph || span.style.code; - if (needs_styling) { - builder.append("\033["); - bool first = true; - if (span.style.strong || span.style.code) { - builder.append('1'); - first = false; - } - if (span.style.emph) { - if (!first) - builder.append(';'); - builder.append('4'); - } - builder.append('m'); - } - - if (!span.style.href.is_null()) { - if (strstr(span.style.href.characters(), "://") != nullptr) { - builder.append("\033]8;;"); - builder.append(span.style.href); - builder.append("\033\\"); - } - } - - builder.append(span.text.characters()); - - if (needs_styling) - builder.append("\033[0m"); - - if (!span.style.href.is_null()) { - // When rendering for the terminal, ignore any - // non-absolute links, because the user has no - // chance to follow them anyway. - if (strstr(span.style.href.characters(), "://") != nullptr) { - builder.appendff(" <{}>", span.style.href); - builder.append("\033]8;;\033\\"); - } - } - if (!span.style.img.is_null()) { - if (strstr(span.style.img.characters(), "://") != nullptr) { - builder.appendff(" <{}>", span.style.img); - } - } - } - - return builder.build(); + m_node->render_for_terminal(builder); + return builder.build().trim(" \n\t"); } -Optional Text::parse(const StringView& str) +Text Text::parse(StringView const& str) { - Style current_style; - size_t current_span_start = 0; - int first_span_in_the_current_link = -1; - bool current_link_is_actually_img = false; - Vector spans; + Text text; + auto const tokens = tokenize(str); + auto iterator = tokens.begin(); + text.m_node = parse_sequence(iterator, false); + return text; +} - auto append_span_if_needed = [&](size_t offset) { - VERIFY(current_span_start <= offset); - if (current_span_start != offset) { - Span span { - unescape(str.substring_view(current_span_start, offset - current_span_start)), - current_style - }; - spans.append(move(span)); - current_span_start = offset; - } +Vector Text::tokenize(StringView const& str) +{ + Vector tokens; + StringBuilder current_token; + + auto flush_token = [&](bool left_flanking, bool right_flanking, bool is_run) { + if (current_token.is_empty()) + return; + + tokens.append({ + current_token.build(), + left_flanking, + right_flanking, + is_run, + }); + current_token.clear(); }; - for (size_t offset = 0; offset < str.length(); offset++) { + for (size_t offset = 0; offset < str.length(); ++offset) { + auto has = [&](StringView const& seq) { + if (offset + seq.length() > str.length()) + return false; + + return str.substring_view(offset, seq.length()) == seq; + }; + + auto expect = [&](StringView const& seq) { + VERIFY(has(seq)); + flush_token(false, false, false); + current_token.append(seq); + flush_token(false, false, false); + offset += seq.length() - 1; + }; + char ch = str[offset]; - bool is_escape = ch == '\\'; - if (is_escape && offset != str.length() - 1) { - offset++; - continue; + if (ch == '\\' && offset + 1 < str.length()) { + current_token.append(str[offset + 1]); + ++offset; + } else if (ch == '*' || ch == '_' || ch == '`') { + flush_token(false, false, false); + + char delim = ch; + size_t run_offset; + for (run_offset = offset; run_offset < str.length() && str[run_offset] == delim; ++run_offset) { + current_token.append(str[run_offset]); + } + + bool left_flanking = run_offset < str.length() && !isspace(str[run_offset]); + bool right_flanking = offset > 0 && !isspace(str[offset - 1]); + flush_token(left_flanking, right_flanking, true); + offset = run_offset - 1; + + } else if (ch == '\n') { + flush_token(false, false, false); + current_token.append(ch); + flush_token(false, false, false); + } else if (has("[")) { + expect("["); + } else if (has("![")) { + expect("!["); + } else if (has("](")) { + expect("]("); + } else if (has(")")) { + expect(")"); + } else { + current_token.append(ch); + } + } + flush_token(false, false, false); + return tokens; +} + +NonnullOwnPtr Text::parse_sequence(Vector::ConstIterator& tokens, bool in_link) +{ + auto node = make(); + + for (; !tokens.is_end(); ++tokens) { + if (tokens->is_run) { + switch (tokens->run_char()) { + case '*': + case '_': + node->children.append(parse_emph(tokens, in_link)); + break; + case '`': + node->children.append(parse_code(tokens)); + break; + } + } else if (!in_link && (*tokens == "[" || *tokens == "![")) { + node->children.append(parse_link(tokens)); + } else if (in_link && *tokens == "](") { + return node; + } else { + node->children.append(make(tokens->data)); } - bool is_special_character = false; - is_special_character |= ch == '`'; - if (!current_style.code) - is_special_character |= ch == '*' || ch == '_' || ch == '[' || ch == ']' || (ch == '!' && offset + 1 < str.length() && str[offset + 1] == '['); - if (!is_special_character) - continue; + if (in_link && !tokens.is_end() && *tokens == "](") + return node; - append_span_if_needed(offset); + if (tokens.is_end()) + break; + } + return node; +} - switch (ch) { - case '`': - current_style.code = !current_style.code; - break; - case '*': - case '_': - if (offset + 1 < str.length() && str[offset + 1] == ch) { - offset++; - current_style.strong = !current_style.strong; - } else { - current_style.emph = !current_style.emph; - } - break; - case '!': - current_link_is_actually_img = true; - break; - case '[': - if constexpr (MARKDOWN_DEBUG) { - if (first_span_in_the_current_link != -1) - dbgln("Dropping the outer link"); - } - first_span_in_the_current_link = spans.size(); - break; - case ']': { - if (first_span_in_the_current_link == -1) { - dbgln_if(MARKDOWN_DEBUG, "Unmatched ]"); - continue; - } - ScopeGuard guard = [&] { - first_span_in_the_current_link = -1; - current_link_is_actually_img = false; - }; - if (offset + 2 >= str.length() || str[offset + 1] != '(') - continue; - offset += 2; - size_t start_of_href = offset; +bool Text::can_open(Token const& opening) +{ + return (opening.run_char() == '*' && opening.left_flanking) || (opening.run_char() == '_' && opening.left_flanking && !opening.right_flanking); +} - do - offset++; - while (offset < str.length() && str[offset] != ')'); - if (offset == str.length()) - offset--; +bool Text::can_close_for(Token const& opening, Text::Token const& closing) +{ + if (opening.run_char() != closing.run_char()) + return false; - const StringView href = str.substring_view(start_of_href, offset - start_of_href); - for (size_t i = first_span_in_the_current_link; i < spans.size(); i++) { - if (current_link_is_actually_img) - spans[i].style.img = href; - else - spans[i].style.href = href; + if (opening.run_length() != closing.run_length()) + return false; + + return (opening.run_char() == '*' && closing.right_flanking) || (opening.run_char() == '_' && !closing.left_flanking && closing.right_flanking); +} + +NonnullOwnPtr Text::parse_emph(Vector::ConstIterator& tokens, bool in_link) +{ + auto opening = *tokens; + + // Check that the opening delimiter run is properly flanking. + if (!can_open(opening)) + return make(opening.data); + + auto child = make(); + for (++tokens; !tokens.is_end(); ++tokens) { + if (tokens->is_run) { + if (can_close_for(opening, *tokens)) { + return make(opening.run_length() >= 2, move(child)); } - break; - } - default: - VERIFY_NOT_REACHED(); + + switch (tokens->run_char()) { + case '*': + case '_': + child->children.append(parse_emph(tokens, in_link)); + break; + case '`': + child->children.append(parse_code(tokens)); + break; + } + } else if (*tokens == "[" || *tokens == "![") { + child->children.append(parse_link(tokens)); + } else if (in_link && *tokens == "](") { + child->children.prepend(make(opening.data)); + return child; + } else { + child->children.append(make(tokens->data)); } - // We've processed the character as a special, so the next offset will - // start after it. Note that explicit continue statements skip over this - // line, effectively treating the character as not special. - current_span_start = offset + 1; + if (in_link && !tokens.is_end() && *tokens == "](") { + child->children.prepend(make(opening.data)); + return child; + } + + if (tokens.is_end()) + break; + } + child->children.prepend(make(opening.data)); + return child; +} + +NonnullOwnPtr Text::parse_code(Vector::ConstIterator& tokens) +{ + auto opening = *tokens; + + auto is_closing = [&](Token const& token) { + return token.is_run && token.run_char() == '`' && token.run_length() == opening.run_length(); + }; + + bool is_all_whitespace = true; + auto code = make(); + for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) { + if (is_closing(*iterator)) { + tokens = iterator; + + // Strip first and last space, when appropriate. + if (!is_all_whitespace) { + auto& first = dynamic_cast(code->children.first()); + auto& last = dynamic_cast(code->children.last()); + if (first.text.starts_with(" ") && last.text.ends_with(" ")) { + first.text = first.text.substring(1); + last.text = last.text.substring(0, last.text.length() - 1); + } + } + + return make(move(code)); + } + + is_all_whitespace = is_all_whitespace && iterator->data.is_whitespace(); + code->children.append(make((*iterator == "\n") ? " " : iterator->data)); } - append_span_if_needed(str.length()); - - return Text(move(spans)); + return make(opening.data); } +NonnullOwnPtr Text::parse_link(Vector::ConstIterator& tokens) +{ + auto opening = *tokens++; + bool is_image = opening == "!["; + + auto link_text = parse_sequence(tokens, true); + + if (tokens.is_end() || *tokens != "](") { + link_text->children.prepend(make(opening.data)); + return link_text; + } + auto seperator = *tokens; + VERIFY(seperator == "]("); + + auto address = make(); + for (auto iterator = tokens + 1; !iterator.is_end(); ++iterator) { + if (*iterator == ")") { + tokens = iterator; + return make(is_image, move(link_text), move(address)); + } + + address->children.append(make(iterator->data)); + } + + link_text->children.prepend(make(opening.data)); + link_text->children.append(make(seperator.data)); + return link_text; +} } diff --git a/Userland/Libraries/LibMarkdown/Text.h b/Userland/Libraries/LibMarkdown/Text.h index 4d21baad4a..0543b4d3e1 100644 --- a/Userland/Libraries/LibMarkdown/Text.h +++ b/Userland/Libraries/LibMarkdown/Text.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2019-2020, Sergey Bugaev + * Copyright (c) 2021, Peter Elliott * * SPDX-License-Identifier: BSD-2-Clause */ @@ -7,48 +8,138 @@ #pragma once #include +#include +#include #include -#include namespace Markdown { class Text final { - AK_MAKE_NONCOPYABLE(Text); - public: - struct Style { - bool emph { false }; - bool strong { false }; - bool code { false }; - String href; - String img; + class Node { + public: + virtual void render_to_html(StringBuilder& builder) const = 0; + virtual void render_for_terminal(StringBuilder& builder) const = 0; + virtual size_t terminal_length() const = 0; + + virtual ~Node() { } }; - struct Span { + class EmphasisNode : public Node { + public: + bool strong; + NonnullOwnPtr child; + + EmphasisNode(bool strong, NonnullOwnPtr child) + : strong(strong) + , child(move(child)) + { + } + + virtual void render_to_html(StringBuilder& builder) const override; + virtual void render_for_terminal(StringBuilder& builder) const override; + virtual size_t terminal_length() const override; + }; + + class CodeNode : public Node { + public: + NonnullOwnPtr code; + + CodeNode(NonnullOwnPtr code) + : code(move(code)) + { + } + + virtual void render_to_html(StringBuilder& builder) const override; + virtual void render_for_terminal(StringBuilder& builder) const override; + virtual size_t terminal_length() const override; + }; + + class TextNode : public Node { + public: String text; - Style style; + + TextNode(StringView const& text) + : text(text) + { + } + + virtual void render_to_html(StringBuilder& builder) const override; + virtual void render_for_terminal(StringBuilder& builder) const override; + virtual size_t terminal_length() const override; }; - explicit Text(String&& text); - Text(Text&& text) = default; - Text() = default; + class LinkNode : public Node { + public: + bool is_image; + NonnullOwnPtr text; + NonnullOwnPtr href; - Text& operator=(Text&&) = default; + LinkNode(bool is_image, NonnullOwnPtr text, NonnullOwnPtr href) + : is_image(is_image) + , text(move(text)) + , href(move(href)) + { + } - const Vector& spans() const { return m_spans; } + virtual void render_to_html(StringBuilder& builder) const override; + virtual void render_for_terminal(StringBuilder& builder) const override; + virtual size_t terminal_length() const override; + }; + + class MultiNode : public Node { + public: + NonnullOwnPtrVector children; + + virtual void render_to_html(StringBuilder& builder) const override; + virtual void render_for_terminal(StringBuilder& builder) const override; + virtual size_t terminal_length() const override; + }; + + size_t terminal_length() const; String render_to_html() const; String render_for_terminal() const; - static Optional parse(const StringView&); + static Text parse(StringView const&); private: - Text(Vector&& spans) - : m_spans(move(spans)) - { - } + struct Token { + String data; + // Flanking basically means that a delimiter run has a non-whitespace, + // non-punctuation character on the corresponsing side. For a more exact + // definition, see the CommonMark spec. + bool left_flanking; + bool right_flanking; + // is_run indicates that this token is a 'delimiter run'. A delimiter + // run occurs when several of the same sytactical character ('`', '_', + // or '*') occur in a row. + bool is_run; - Vector m_spans; + char run_char() const + { + VERIFY(is_run); + return data[0]; + } + char run_length() const + { + VERIFY(is_run); + return data.length(); + } + bool operator==(StringView const& str) const { return str == data; } + }; + + static Vector tokenize(StringView const&); + + static bool can_open(Token const& opening); + static bool can_close_for(Token const& opening, Token const& closing); + + static NonnullOwnPtr parse_sequence(Vector::ConstIterator& tokens, bool in_link); + static NonnullOwnPtr parse_emph(Vector::ConstIterator& tokens, bool in_link); + static NonnullOwnPtr parse_code(Vector::ConstIterator& tokens); + static NonnullOwnPtr parse_link(Vector::ConstIterator& tokens); + + OwnPtr m_node; }; }