Serenity Operating System
at master 341 lines 13 kB view raw
1/* 2 * Copyright (c) 2021, Ben Wiederhake <BenWiederhake.GitHub@gmx.de> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7/* 8 * You may want to invoke the checker like this: 9 * $ cd Build/lagom 10 * $ ninja 11 * $ find ../../AK ../../Base ../../Documentation/ ../../Kernel/ ../../Meta/ ../../Ports/ ../../Tests/ ../../Userland/ -type f -name '*.md' | xargs ./markdown-check ../../README.md 12 */ 13 14#include <AK/Format.h> 15#include <AK/HashMap.h> 16#include <AK/HashTable.h> 17#include <AK/LexicalPath.h> 18#include <AK/RecursionDecision.h> 19#include <AK/URL.h> 20#include <AK/Vector.h> 21#include <LibCore/ArgsParser.h> 22#include <LibCore/DeprecatedFile.h> 23#include <LibCore/File.h> 24#include <LibMain/Main.h> 25#include <LibMarkdown/Document.h> 26#include <LibMarkdown/Visitor.h> 27#include <stdlib.h> 28 29static bool is_missing_file_acceptable(DeprecatedString const& filename) 30{ 31 const StringView acceptable_missing_files[] = { 32 // FIXME: Please write these manpages! 33 "/usr/share/man/man2/accept.md"sv, 34 "/usr/share/man/man2/exec.md"sv, 35 "/usr/share/man/man2/fcntl.md"sv, 36 "/usr/share/man/man2/fork.md"sv, 37 "/usr/share/man/man2/ioctl.md"sv, 38 "/usr/share/man/man2/listen.md"sv, 39 "/usr/share/man/man2/mmap.md"sv, 40 "/usr/share/man/man2/mprotect.md"sv, 41 "/usr/share/man/man2/open.md"sv, 42 "/usr/share/man/man2/ptrace.md"sv, 43 "/usr/share/man/man5/perfcore.md"sv, 44 // These ones are okay: 45 "/home/anon/Tests/js-tests/test-common.js"sv, 46 "/man1/index.html"sv, 47 "/man2/index.html"sv, 48 "/man3/index.html"sv, 49 "/man4/index.html"sv, 50 "/man5/index.html"sv, 51 "/man6/index.html"sv, 52 "/man7/index.html"sv, 53 "/man8/index.html"sv, 54 "index.html"sv, 55 }; 56 for (auto const& suffix : acceptable_missing_files) { 57 if (filename.ends_with(suffix)) 58 return true; 59 } 60 return false; 61} 62 63struct FileLink { 64 DeprecatedString file_path; // May be empty, but not null 65 DeprecatedString anchor; // May be null ("foo.md", "bar.png"), may be empty ("baz.md#") 66 DeprecatedString label; // May be empty, but not null 67}; 68 69class MarkdownLinkage final : Markdown::Visitor { 70public: 71 ~MarkdownLinkage() = default; 72 73 static MarkdownLinkage analyze(Markdown::Document const&); 74 75 bool has_anchor(DeprecatedString const& anchor) const { return m_anchors.contains(anchor); } 76 HashTable<DeprecatedString> const& anchors() const { return m_anchors; } 77 bool has_invalid_link() const { return m_has_invalid_link; } 78 Vector<FileLink> const& file_links() const { return m_file_links; } 79 80private: 81 MarkdownLinkage() 82 { 83 auto const* source_directory = getenv("SERENITY_SOURCE_DIR"); 84 if (source_directory != nullptr) { 85 m_serenity_source_directory = source_directory; 86 } else { 87 warnln("The environment variable SERENITY_SOURCE_DIR was not found. Link checking inside Serenity's filesystem will fail."); 88 } 89 } 90 91 virtual RecursionDecision visit(Markdown::Heading const&) override; 92 virtual RecursionDecision visit(Markdown::Text::LinkNode const&) override; 93 94 HashTable<DeprecatedString> m_anchors; 95 Vector<FileLink> m_file_links; 96 bool m_has_invalid_link { false }; 97 98 DeprecatedString m_serenity_source_directory; 99}; 100 101MarkdownLinkage MarkdownLinkage::analyze(Markdown::Document const& document) 102{ 103 MarkdownLinkage linkage; 104 105 document.walk(linkage); 106 107 return linkage; 108} 109 110class StringCollector final : Markdown::Visitor { 111public: 112 StringCollector() = default; 113 virtual ~StringCollector() = default; 114 115 DeprecatedString build() { return m_builder.to_deprecated_string(); } 116 117 static DeprecatedString from(Markdown::Heading const& heading) 118 { 119 StringCollector collector; 120 heading.walk(collector); 121 return collector.build(); 122 } 123 124 static DeprecatedString from(Markdown::Text::Node const& node) 125 { 126 StringCollector collector; 127 node.walk(collector); 128 return collector.build(); 129 } 130 131private: 132 virtual RecursionDecision visit(DeprecatedString const& text) override 133 { 134 m_builder.append(text); 135 return RecursionDecision::Recurse; 136 } 137 138 StringBuilder m_builder; 139}; 140 141static DeprecatedString slugify(DeprecatedString const& text) 142{ 143 // TODO: This feels like it belongs into LibWeb. 144 DeprecatedString slug = text.to_lowercase(); 145 // Reverse-engineered through github, using: 146 // find AK/ Base/ Documentation/ Kernel/ Meta/ Ports/ Tests/ Userland/ -name '*.md' | xargs grep --color=always -Pin '^##+ .*[^a-z0-9 ?()`_:/!&|.$'"'"',<>"+-]' README.md 147 slug = slug.replace(" "sv, "-"sv, ReplaceMode::All) 148 .replace("!"sv, ""sv, ReplaceMode::All) 149 .replace("?"sv, ""sv, ReplaceMode::All) 150 .replace("("sv, ""sv, ReplaceMode::All) 151 .replace(")"sv, ""sv, ReplaceMode::All) 152 .replace(":"sv, ""sv, ReplaceMode::All) 153 .replace("/"sv, "-"sv, ReplaceMode::All) 154 .replace("&"sv, ""sv, ReplaceMode::All) 155 .replace("|"sv, ""sv, ReplaceMode::All) 156 .replace("."sv, ""sv, ReplaceMode::All) 157 .replace("$"sv, ""sv, ReplaceMode::All) 158 .replace("'"sv, ""sv, ReplaceMode::All) 159 .replace(","sv, ""sv, ReplaceMode::All) 160 .replace("\""sv, ""sv, ReplaceMode::All) 161 .replace("+"sv, ""sv, ReplaceMode::All) 162 .replace("\\"sv, ""sv, ReplaceMode::All) 163 .replace("<"sv, ""sv, ReplaceMode::All) 164 .replace(">"sv, ""sv, ReplaceMode::All); 165 // What about "="? 166 return slug; 167} 168 169RecursionDecision MarkdownLinkage::visit(Markdown::Heading const& heading) 170{ 171 m_anchors.set(slugify(StringCollector::from(heading))); 172 return RecursionDecision::Recurse; 173} 174 175RecursionDecision MarkdownLinkage::visit(Markdown::Text::LinkNode const& link_node) 176{ 177 DeprecatedString const& href = link_node.href; 178 if (href.is_null()) { 179 // Nothing to do here. 180 return RecursionDecision::Recurse; 181 } 182 auto url = URL::create_with_url_or_path(href); 183 if (url.is_valid()) { 184 if (url.scheme() == "https" || url.scheme() == "http") { 185 outln("Not checking external link {}", href); 186 return RecursionDecision::Recurse; 187 } 188 if (url.scheme() == "help") { 189 if (url.host() != "man") { 190 warnln("help:// URL without 'man': {}", href); 191 m_has_invalid_link = true; 192 return RecursionDecision::Recurse; 193 } 194 if (url.paths().size() < 2) { 195 warnln("help://man URL is missing section or page: {}", href); 196 m_has_invalid_link = true; 197 return RecursionDecision::Recurse; 198 } 199 200 // Remove leading '/' from the path. 201 auto file = DeprecatedString::formatted("{}/Base/usr/share/man/man{}.md", m_serenity_source_directory, url.path().substring(1)); 202 203 m_file_links.append({ file, DeprecatedString(), StringCollector::from(*link_node.text) }); 204 return RecursionDecision::Recurse; 205 } 206 if (url.scheme() == "file") { 207 if (url.path().contains("man"sv) && url.path().ends_with(".md"sv)) { 208 warnln("Inter-manpage link without the help:// scheme: {}\nPlease use help URLs of the form 'help://man/<section>/<subsection...>/<page>'", href); 209 m_has_invalid_link = true; 210 return RecursionDecision::Recurse; 211 } 212 // TODO: Check more possible links other than icons. 213 if (url.path().starts_with("/res/icons/"sv)) { 214 auto file = DeprecatedString::formatted("{}/Base{}", m_serenity_source_directory, url.path()); 215 m_file_links.append({ file, DeprecatedString(), StringCollector::from(*link_node.text) }); 216 } else if (url.path().starts_with("/bin"sv)) { 217 StringBuilder builder; 218 link_node.text->render_to_html(builder); 219 auto link_text = builder.string_view(); 220 if (link_text != "Open"sv) { 221 warnln("Binary link named '{}' is not allowed, binary links must be called 'Open'. Linked binary: {}", link_text, href); 222 m_has_invalid_link = true; 223 } 224 } else { 225 outln("Not checking local link {}", href); 226 } 227 return RecursionDecision::Recurse; 228 } 229 } 230 231 DeprecatedString label = StringCollector::from(*link_node.text); 232 Optional<size_t> last_hash = href.find_last('#'); 233 if (last_hash.has_value()) { 234 m_file_links.append({ href.substring(0, last_hash.value()), href.substring(last_hash.value() + 1), label }); 235 } else { 236 m_file_links.append({ href, DeprecatedString(), label }); 237 } 238 239 return RecursionDecision::Recurse; 240} 241 242ErrorOr<int> serenity_main(Main::Arguments arguments) 243{ 244 Core::ArgsParser args_parser; 245 Vector<StringView> file_paths; 246 args_parser.add_positional_argument(file_paths, "Path to markdown files to read and parse", "paths", Core::ArgsParser::Required::Yes); 247 args_parser.parse(arguments); 248 249 outln("Reading and parsing Markdown files ..."); 250 HashMap<DeprecatedString, MarkdownLinkage> files; 251 for (auto path : file_paths) { 252 auto file_or_error = Core::File::open(path, Core::File::OpenMode::Read); 253 if (file_or_error.is_error()) { 254 warnln("Failed to open {}: {}", path, file_or_error.error()); 255 // Since this should never happen anyway, fail early. 256 return file_or_error.release_error(); 257 } 258 auto file = file_or_error.release_value(); 259 260 auto content_buffer_or_error = file->read_until_eof(); 261 if (content_buffer_or_error.is_error()) { 262 warnln("Failed to read {}: {}", path, file_or_error.error()); 263 // Since this should never happen anyway, fail early. 264 return file_or_error.release_error(); 265 } 266 auto content_buffer = content_buffer_or_error.release_value(); 267 268 auto content = StringView(content_buffer); 269 auto document = Markdown::Document::parse(content); 270 if (!document) { 271 warnln("Failed to parse {} due to an unspecified error.", path); 272 // Since this should never happen anyway, fail early. 273 return 1; 274 } 275 files.set(Core::DeprecatedFile::real_path_for(path), MarkdownLinkage::analyze(*document)); 276 } 277 278 outln("Checking links ..."); 279 bool any_problems = false; 280 for (auto const& file_item : files) { 281 if (file_item.value.has_invalid_link()) { 282 outln("File '{}' has invalid links.", file_item.key); 283 any_problems = true; 284 continue; 285 } 286 287 auto file_lexical_path = LexicalPath(file_item.key); 288 auto file_dir = file_lexical_path.dirname(); 289 for (auto const& file_link : file_item.value.file_links()) { 290 DeprecatedString pointee_file; 291 if (file_link.file_path.is_empty()) { 292 pointee_file = file_item.key; 293 } else { 294 pointee_file = LexicalPath::absolute_path(file_dir, file_link.file_path); 295 } 296 if (!Core::DeprecatedFile::exists(pointee_file) && !is_missing_file_acceptable(pointee_file)) { 297 outln("File '{}' points to '{}' (label '{}'), but '{}' does not exist!", 298 file_item.key, file_link.file_path, file_link.label, pointee_file); 299 any_problems = true; 300 continue; 301 } 302 if (file_link.anchor.is_empty()) { 303 // No anchor to test for. 304 continue; 305 } 306 307 auto pointee_linkage = files.find(pointee_file); 308 if (pointee_linkage == files.end()) { 309 outln("File '{}' points to file '{}', which exists, but was not scanned. Add it to the command-line arguments and re-run.", 310 file_item.key, pointee_file); 311 any_problems = true; 312 continue; 313 } 314 315 if (!pointee_linkage->value.has_anchor(file_link.anchor)) { 316 outln("File '{}' points to '{}#{}' (label '{}'), but file '{}' does not have any heading that results in the anchor '{}'.", 317 file_item.key, file_link.file_path, file_link.anchor, file_link.label, pointee_file, file_link.anchor); 318 out(" The following anchors seem to be available:\n "); 319 bool any_anchors = false; 320 for (auto const& anchor : pointee_linkage->value.anchors()) { 321 if (any_anchors) 322 out(", "); 323 out("'{}'", anchor); 324 any_anchors = true; 325 } 326 if (!any_anchors) 327 out("(none)"); 328 outln(); 329 any_problems = true; 330 } 331 } 332 } 333 334 if (any_problems) { 335 outln("Done. Some errors were encountered, please check above log."); 336 return 1; 337 } else { 338 outln("Done. No problems detected."); 339 return 0; 340 } 341}