mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 15:32:46 +00:00 
			
		
		
		
	markdown-checker: New tool that checks document links
This commit is contained in:
		
							parent
							
								
									50ad294527
								
							
						
					
					
						commit
						3f88d65b78
					
				
					 3 changed files with 248 additions and 0 deletions
				
			
		|  | @ -436,6 +436,10 @@ if (BUILD_LAGOM) | |||
|         set_target_properties(js_lagom PROPERTIES OUTPUT_NAME js) | ||||
|         target_link_libraries(js_lagom LagomJS LagomLine Threads::Threads) | ||||
| 
 | ||||
|         add_executable(markdown-check_lagom ../../Userland/Utilities/markdown-check.cpp) | ||||
|         set_target_properties(markdown-check_lagom PROPERTIES OUTPUT_NAME markdown-check) | ||||
|         target_link_libraries(markdown-check_lagom LagomMarkdown) | ||||
| 
 | ||||
|         add_executable(ntpquery_lagom ../../Userland/Utilities/ntpquery.cpp) | ||||
|         set_target_properties(ntpquery_lagom PROPERTIES OUTPUT_NAME ntpquery) | ||||
|         target_link_libraries(ntpquery_lagom LagomCore) | ||||
|  |  | |||
|  | @ -73,6 +73,7 @@ target_link_libraries(keymap LibKeyboard) | |||
| target_link_libraries(lspci LibPCIDB) | ||||
| target_link_libraries(lsusb LibUSBDB) | ||||
| target_link_libraries(man LibMarkdown) | ||||
| target_link_libraries(markdown-check LibMarkdown) | ||||
| target_link_libraries(matroska LibVideo) | ||||
| target_link_libraries(md LibMarkdown) | ||||
| target_link_libraries(misbehaving-application LibCore) | ||||
|  |  | |||
							
								
								
									
										243
									
								
								Userland/Utilities/markdown-check.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										243
									
								
								Userland/Utilities/markdown-check.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,243 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2021, Ben Wiederhake <BenWiederhake.GitHub@gmx.de> | ||||
|  * | ||||
|  * SPDX-License-Identifier: BSD-2-Clause | ||||
|  */ | ||||
| 
 | ||||
| /*
 | ||||
|  * You may want to invoke the checker like this: | ||||
|  * $ cd Build/lagom | ||||
|  * $ ninja | ||||
|  * $ find ../../AK ../../Base ../../Documentation/ ../../Kernel/ ../../Meta/ ../../Ports/ ../../Tests/ ../../Userland/ -type f -name '*.md' | xargs ./markdown-check ../../README.md | ||||
|  */ | ||||
| 
 | ||||
| #include <AK/Format.h> | ||||
| #include <AK/HashMap.h> | ||||
| #include <AK/HashTable.h> | ||||
| #include <AK/LexicalPath.h> | ||||
| #include <AK/OwnPtr.h> | ||||
| #include <AK/StdLibExtras.h> | ||||
| #include <AK/Vector.h> | ||||
| #include <LibCore/File.h> | ||||
| #include <LibMarkdown/Document.h> | ||||
| #include <LibMarkdown/Visitor.h> | ||||
| 
 | ||||
| struct FileLink { | ||||
|     String file_path; // May be empty, but not null
 | ||||
|     String anchor;    // May be null ("foo.md", "bar.png"), may be empty ("baz.md#")
 | ||||
|     String label;     // May be empty, but not null
 | ||||
| }; | ||||
| 
 | ||||
| class MarkdownLinkage final : Markdown::Visitor { | ||||
| public: | ||||
|     ~MarkdownLinkage() = default; | ||||
| 
 | ||||
|     static MarkdownLinkage analyze(Markdown::Document const&); | ||||
| 
 | ||||
|     bool has_anchor(String const& anchor) const { return m_anchors.contains(anchor); } | ||||
|     HashTable<String> const& anchors() const { return m_anchors; } | ||||
|     Vector<FileLink> const& file_links() const { return m_file_links; } | ||||
| 
 | ||||
| private: | ||||
|     MarkdownLinkage() = default; | ||||
| 
 | ||||
|     virtual RecursionDecision visit(Markdown::Heading const&) override; | ||||
|     virtual RecursionDecision visit(Markdown::Text::LinkNode const&) override; | ||||
| 
 | ||||
|     HashTable<String> m_anchors; | ||||
|     Vector<FileLink> m_file_links; | ||||
| }; | ||||
| 
 | ||||
| MarkdownLinkage MarkdownLinkage::analyze(Markdown::Document const& document) | ||||
| { | ||||
|     MarkdownLinkage linkage; | ||||
| 
 | ||||
|     document.walk(linkage); | ||||
| 
 | ||||
|     return linkage; | ||||
| } | ||||
| 
 | ||||
| class StringCollector final : Markdown::Visitor { | ||||
| public: | ||||
|     StringCollector() = default; | ||||
|     virtual ~StringCollector() = default; | ||||
| 
 | ||||
|     String build() { return m_builder.build(); } | ||||
| 
 | ||||
|     static String from(Markdown::Heading const& heading) | ||||
|     { | ||||
|         StringCollector collector; | ||||
|         heading.walk(collector); | ||||
|         return collector.build(); | ||||
|     } | ||||
| 
 | ||||
|     static String from(Markdown::Text::Node const& node) | ||||
|     { | ||||
|         StringCollector collector; | ||||
|         node.walk(collector); | ||||
|         return collector.build(); | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     virtual RecursionDecision visit(String const& text) override | ||||
|     { | ||||
|         m_builder.append(text); | ||||
|         return RecursionDecision::Recurse; | ||||
|     } | ||||
| 
 | ||||
|     StringBuilder m_builder; | ||||
| }; | ||||
| 
 | ||||
| static String slugify(String const& text) | ||||
| { | ||||
|     // TODO: This feels like it belongs into LibWeb.
 | ||||
|     String slug = text.to_lowercase(); | ||||
|     // Reverse-engineered through github, using:
 | ||||
|     // find AK/ Base/ Documentation/ Kernel/ Meta/ Ports/ Tests/ Userland/ -name '*.md' | xargs grep --color=always -Pin '^##+ .*[^a-z0-9 ?()`_:/!&|.$'"'"',<>"+-]' README.md
 | ||||
|     slug = slug.replace(" ", "-", true) | ||||
|                .replace("!", "", true) | ||||
|                .replace("?", "", true) | ||||
|                .replace("(", "", true) | ||||
|                .replace(")", "", true) | ||||
|                .replace(":", "", true) | ||||
|                .replace("/", "-", true) | ||||
|                .replace("&", "", true) | ||||
|                .replace("|", "", true) | ||||
|                .replace(".", "", true) | ||||
|                .replace("$", "", true) | ||||
|                .replace("'", "", true) | ||||
|                .replace(",", "", true) | ||||
|                .replace("\"", "", true) | ||||
|                .replace("+", "", true) | ||||
|                .replace("\\", "", true) | ||||
|                .replace("<", "", true) | ||||
|                .replace(">", "", true); | ||||
|     // What about "="?
 | ||||
|     return slug; | ||||
| } | ||||
| 
 | ||||
| RecursionDecision MarkdownLinkage::visit(Markdown::Heading const& heading) | ||||
| { | ||||
|     m_anchors.set(slugify(StringCollector::from(heading))); | ||||
|     return RecursionDecision::Recurse; | ||||
| } | ||||
| 
 | ||||
| RecursionDecision MarkdownLinkage::visit(Markdown::Text::LinkNode const& link_node) | ||||
| { | ||||
|     String const& href = link_node.href; | ||||
|     if (href.is_null()) { | ||||
|         // Nothing to do here.
 | ||||
|         return RecursionDecision::Recurse; | ||||
|     } | ||||
|     if (href.starts_with("https://") || href.starts_with("http://")) { | ||||
|         outln("Not checking external link {}", href); | ||||
|         return RecursionDecision::Recurse; | ||||
|     } | ||||
|     if (href.starts_with("file://")) { | ||||
|         // TODO: Resolve relative to $SERENITY_SOURCE_DIR/Base/
 | ||||
|         // Currently, this affects only one link, so it's not worth the effort.
 | ||||
|         outln("Not checking local link {}", href); | ||||
|         return RecursionDecision::Recurse; | ||||
|     } | ||||
| 
 | ||||
|     String label = StringCollector::from(*link_node.text); | ||||
|     Optional<size_t> last_hash = href.find_last('#'); | ||||
|     if (last_hash.has_value()) { | ||||
|         m_file_links.append({ href.substring(0, last_hash.value()), href.substring(last_hash.value() + 1), label }); | ||||
|     } else { | ||||
|         m_file_links.append({ href, String(), label }); | ||||
|     } | ||||
| 
 | ||||
|     return RecursionDecision::Recurse; | ||||
| } | ||||
| 
 | ||||
| int main(int argc, char** argv) | ||||
| { | ||||
|     if (argc < 2) { | ||||
|         // Technically it is valid to call this program with zero markdown files: When there are
 | ||||
|         // no files, there are no dead links. However, any such usage is probably erroneous.
 | ||||
|         warnln("Usage: {} Foo.md Bar.md ...", argv[0]); | ||||
|         // E.g.: find AK/ Base/ Documentation/ Kernel/ Meta/ Ports/ Tests/ Userland/ -name '*.md' -print0 | xargs -0 ./MarkdownCheck
 | ||||
|         return 1; | ||||
|     } | ||||
| 
 | ||||
|     outln("Reading and parsing Markdown files ..."); | ||||
|     HashMap<String, MarkdownLinkage> files; | ||||
|     for (int i = 1; i < argc; ++i) { | ||||
|         auto path = argv[i]; | ||||
|         auto file_or_error = Core::File::open(path, Core::OpenMode::ReadOnly); | ||||
|         if (file_or_error.is_error()) { | ||||
|             warnln("Failed to read {}: {}", path, file_or_error.error()); | ||||
|             // Since this should never happen anyway, fail early.
 | ||||
|             return 1; | ||||
|         } | ||||
|         auto file = file_or_error.release_value(); | ||||
|         auto content_buffer = file->read_all(); | ||||
|         auto content = StringView(content_buffer); | ||||
|         auto document = Markdown::Document::parse(content); | ||||
|         if (!document) { | ||||
|             warnln("Failed to parse {} due to an unspecified error.", path); | ||||
|             // Since this should never happen anyway, fail early.
 | ||||
|             return 1; | ||||
|         } | ||||
|         files.set(Core::File::real_path_for(path), MarkdownLinkage::analyze(*document)); | ||||
|     } | ||||
| 
 | ||||
|     outln("Checking links ..."); | ||||
|     bool any_problems = false; | ||||
|     for (auto const& file_item : files) { | ||||
|         auto file_lexical_path = LexicalPath(file_item.key); | ||||
|         auto file_dir = file_lexical_path.dirname(); | ||||
|         for (auto const& file_link : file_item.value.file_links()) { | ||||
|             String pointee_file; | ||||
|             if (file_link.file_path.is_empty()) { | ||||
|                 pointee_file = file_item.key; | ||||
|             } else { | ||||
|                 pointee_file = LexicalPath::absolute_path(file_dir, file_link.file_path); | ||||
|             } | ||||
|             if (!Core::File::exists(pointee_file)) { | ||||
|                 outln("File '{}' points to '{}' (label '{}'), but '{}' does not exist!", | ||||
|                     file_item.key, file_link.file_path, file_link.label, pointee_file); | ||||
|                 any_problems = true; | ||||
|                 continue; | ||||
|             } | ||||
|             if (file_link.anchor.is_empty()) { | ||||
|                 // No anchor to test for.
 | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             auto pointee_linkage = files.find(pointee_file); | ||||
|             if (pointee_linkage == files.end()) { | ||||
|                 outln("File '{}' points to file '{}', which exists, but was not scanned. Add it to the command-line arguments and re-run.", | ||||
|                     file_item.key, pointee_file); | ||||
|                 any_problems = true; | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             if (!pointee_linkage->value.has_anchor(file_link.anchor)) { | ||||
|                 outln("File '{}' points to '{}#{}' (label '{}'), but file '{}' does not have any heading that results in the anchor '{}'.", | ||||
|                     file_item.key, file_link.file_path, file_link.anchor, file_link.label, pointee_file, file_link.anchor); | ||||
|                 out("    The following anchors seem to be available:\n    "); | ||||
|                 bool any_anchors = false; | ||||
|                 for (auto const& anchor : pointee_linkage->value.anchors()) { | ||||
|                     if (any_anchors) | ||||
|                         out(", "); | ||||
|                     out("'{}'", anchor); | ||||
|                     any_anchors = true; | ||||
|                 } | ||||
|                 if (!any_anchors) | ||||
|                     out("(none)"); | ||||
|                 outln(); | ||||
|                 any_problems = true; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (any_problems) { | ||||
|         outln("Done. Some errors were encountered, please check above log."); | ||||
|         return 1; | ||||
|     } else { | ||||
|         outln("Done. No problems detected."); | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Ben Wiederhake
						Ben Wiederhake