diff --git a/Meta/Lagom/CMakeLists.txt b/Meta/Lagom/CMakeLists.txt index 20c81cb3e0..f9ec5e77c2 100644 --- a/Meta/Lagom/CMakeLists.txt +++ b/Meta/Lagom/CMakeLists.txt @@ -436,6 +436,10 @@ if (BUILD_LAGOM) set_target_properties(js_lagom PROPERTIES OUTPUT_NAME js) target_link_libraries(js_lagom LagomJS LagomLine Threads::Threads) + add_executable(markdown-check_lagom ../../Userland/Utilities/markdown-check.cpp) + set_target_properties(markdown-check_lagom PROPERTIES OUTPUT_NAME markdown-check) + target_link_libraries(markdown-check_lagom LagomMarkdown) + add_executable(ntpquery_lagom ../../Userland/Utilities/ntpquery.cpp) set_target_properties(ntpquery_lagom PROPERTIES OUTPUT_NAME ntpquery) target_link_libraries(ntpquery_lagom LagomCore) diff --git a/Userland/Utilities/CMakeLists.txt b/Userland/Utilities/CMakeLists.txt index 82c2f340c8..c852826a6a 100644 --- a/Userland/Utilities/CMakeLists.txt +++ b/Userland/Utilities/CMakeLists.txt @@ -73,6 +73,7 @@ target_link_libraries(keymap LibKeyboard) target_link_libraries(lspci LibPCIDB) target_link_libraries(lsusb LibUSBDB) target_link_libraries(man LibMarkdown) +target_link_libraries(markdown-check LibMarkdown) target_link_libraries(matroska LibVideo) target_link_libraries(md LibMarkdown) target_link_libraries(misbehaving-application LibCore) diff --git a/Userland/Utilities/markdown-check.cpp b/Userland/Utilities/markdown-check.cpp new file mode 100644 index 0000000000..1fdfa3435d --- /dev/null +++ b/Userland/Utilities/markdown-check.cpp @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2021, Ben Wiederhake + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +/* + * You may want to invoke the checker like this: + * $ cd Build/lagom + * $ ninja + * $ find ../../AK ../../Base ../../Documentation/ ../../Kernel/ ../../Meta/ ../../Ports/ ../../Tests/ ../../Userland/ -type f -name '*.md' | xargs ./markdown-check ../../README.md + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct FileLink { + String file_path; // May be empty, but not null + String anchor; // May be null ("foo.md", "bar.png"), may be empty ("baz.md#") + String label; // May be empty, but not null +}; + +class MarkdownLinkage final : Markdown::Visitor { +public: + ~MarkdownLinkage() = default; + + static MarkdownLinkage analyze(Markdown::Document const&); + + bool has_anchor(String const& anchor) const { return m_anchors.contains(anchor); } + HashTable const& anchors() const { return m_anchors; } + Vector const& file_links() const { return m_file_links; } + +private: + MarkdownLinkage() = default; + + virtual RecursionDecision visit(Markdown::Heading const&) override; + virtual RecursionDecision visit(Markdown::Text::LinkNode const&) override; + + HashTable m_anchors; + Vector m_file_links; +}; + +MarkdownLinkage MarkdownLinkage::analyze(Markdown::Document const& document) +{ + MarkdownLinkage linkage; + + document.walk(linkage); + + return linkage; +} + +class StringCollector final : Markdown::Visitor { +public: + StringCollector() = default; + virtual ~StringCollector() = default; + + String build() { return m_builder.build(); } + + static String from(Markdown::Heading const& heading) + { + StringCollector collector; + heading.walk(collector); + return collector.build(); + } + + static String from(Markdown::Text::Node const& node) + { + StringCollector collector; + node.walk(collector); + return collector.build(); + } + +private: + virtual RecursionDecision visit(String const& text) override + { + m_builder.append(text); + return RecursionDecision::Recurse; + } + + StringBuilder m_builder; +}; + +static String slugify(String const& text) +{ + // TODO: This feels like it belongs into LibWeb. + String slug = text.to_lowercase(); + // Reverse-engineered through github, using: + // find AK/ Base/ Documentation/ Kernel/ Meta/ Ports/ Tests/ Userland/ -name '*.md' | xargs grep --color=always -Pin '^##+ .*[^a-z0-9 ?()`_:/!&|.$'"'"',<>"+-]' README.md + slug = slug.replace(" ", "-", true) + .replace("!", "", true) + .replace("?", "", true) + .replace("(", "", true) + .replace(")", "", true) + .replace(":", "", true) + .replace("/", "-", true) + .replace("&", "", true) + .replace("|", "", true) + .replace(".", "", true) + .replace("$", "", true) + .replace("'", "", true) + .replace(",", "", true) + .replace("\"", "", true) + .replace("+", "", true) + .replace("\\", "", true) + .replace("<", "", true) + .replace(">", "", true); + // What about "="? + return slug; +} + +RecursionDecision MarkdownLinkage::visit(Markdown::Heading const& heading) +{ + m_anchors.set(slugify(StringCollector::from(heading))); + return RecursionDecision::Recurse; +} + +RecursionDecision MarkdownLinkage::visit(Markdown::Text::LinkNode const& link_node) +{ + String const& href = link_node.href; + if (href.is_null()) { + // Nothing to do here. + return RecursionDecision::Recurse; + } + if (href.starts_with("https://") || href.starts_with("http://")) { + outln("Not checking external link {}", href); + return RecursionDecision::Recurse; + } + if (href.starts_with("file://")) { + // TODO: Resolve relative to $SERENITY_SOURCE_DIR/Base/ + // Currently, this affects only one link, so it's not worth the effort. + outln("Not checking local link {}", href); + return RecursionDecision::Recurse; + } + + String label = StringCollector::from(*link_node.text); + Optional last_hash = href.find_last('#'); + if (last_hash.has_value()) { + m_file_links.append({ href.substring(0, last_hash.value()), href.substring(last_hash.value() + 1), label }); + } else { + m_file_links.append({ href, String(), label }); + } + + return RecursionDecision::Recurse; +} + +int main(int argc, char** argv) +{ + if (argc < 2) { + // Technically it is valid to call this program with zero markdown files: When there are + // no files, there are no dead links. However, any such usage is probably erroneous. + warnln("Usage: {} Foo.md Bar.md ...", argv[0]); + // E.g.: find AK/ Base/ Documentation/ Kernel/ Meta/ Ports/ Tests/ Userland/ -name '*.md' -print0 | xargs -0 ./MarkdownCheck + return 1; + } + + outln("Reading and parsing Markdown files ..."); + HashMap files; + for (int i = 1; i < argc; ++i) { + auto path = argv[i]; + auto file_or_error = Core::File::open(path, Core::OpenMode::ReadOnly); + if (file_or_error.is_error()) { + warnln("Failed to read {}: {}", path, file_or_error.error()); + // Since this should never happen anyway, fail early. + return 1; + } + auto file = file_or_error.release_value(); + auto content_buffer = file->read_all(); + auto content = StringView(content_buffer); + auto document = Markdown::Document::parse(content); + if (!document) { + warnln("Failed to parse {} due to an unspecified error.", path); + // Since this should never happen anyway, fail early. + return 1; + } + files.set(Core::File::real_path_for(path), MarkdownLinkage::analyze(*document)); + } + + outln("Checking links ..."); + bool any_problems = false; + for (auto const& file_item : files) { + auto file_lexical_path = LexicalPath(file_item.key); + auto file_dir = file_lexical_path.dirname(); + for (auto const& file_link : file_item.value.file_links()) { + String pointee_file; + if (file_link.file_path.is_empty()) { + pointee_file = file_item.key; + } else { + pointee_file = LexicalPath::absolute_path(file_dir, file_link.file_path); + } + if (!Core::File::exists(pointee_file)) { + outln("File '{}' points to '{}' (label '{}'), but '{}' does not exist!", + file_item.key, file_link.file_path, file_link.label, pointee_file); + any_problems = true; + continue; + } + if (file_link.anchor.is_empty()) { + // No anchor to test for. + continue; + } + + auto pointee_linkage = files.find(pointee_file); + if (pointee_linkage == files.end()) { + outln("File '{}' points to file '{}', which exists, but was not scanned. Add it to the command-line arguments and re-run.", + file_item.key, pointee_file); + any_problems = true; + continue; + } + + if (!pointee_linkage->value.has_anchor(file_link.anchor)) { + outln("File '{}' points to '{}#{}' (label '{}'), but file '{}' does not have any heading that results in the anchor '{}'.", + file_item.key, file_link.file_path, file_link.anchor, file_link.label, pointee_file, file_link.anchor); + out(" The following anchors seem to be available:\n "); + bool any_anchors = false; + for (auto const& anchor : pointee_linkage->value.anchors()) { + if (any_anchors) + out(", "); + out("'{}'", anchor); + any_anchors = true; + } + if (!any_anchors) + out("(none)"); + outln(); + any_problems = true; + } + } + } + + if (any_problems) { + outln("Done. Some errors were encountered, please check above log."); + return 1; + } else { + outln("Done. No problems detected."); + return 0; + } +}