Serenity Operating System
1/*
2 * Copyright (c) 2021, Ben Wiederhake <BenWiederhake.GitHub@gmx.de>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7/*
8 * You may want to invoke the checker like this:
9 * $ cd Build/lagom
10 * $ ninja
11 * $ find ../../AK ../../Base ../../Documentation/ ../../Kernel/ ../../Meta/ ../../Ports/ ../../Tests/ ../../Userland/ -type f -name '*.md' | xargs ./markdown-check ../../README.md
12 */
13
14#include <AK/Format.h>
15#include <AK/HashMap.h>
16#include <AK/HashTable.h>
17#include <AK/LexicalPath.h>
18#include <AK/RecursionDecision.h>
19#include <AK/URL.h>
20#include <AK/Vector.h>
21#include <LibCore/ArgsParser.h>
22#include <LibCore/DeprecatedFile.h>
23#include <LibCore/File.h>
24#include <LibMain/Main.h>
25#include <LibMarkdown/Document.h>
26#include <LibMarkdown/Visitor.h>
27#include <stdlib.h>
28
29static bool is_missing_file_acceptable(DeprecatedString const& filename)
30{
31 const StringView acceptable_missing_files[] = {
32 // FIXME: Please write these manpages!
33 "/usr/share/man/man2/accept.md"sv,
34 "/usr/share/man/man2/exec.md"sv,
35 "/usr/share/man/man2/fcntl.md"sv,
36 "/usr/share/man/man2/fork.md"sv,
37 "/usr/share/man/man2/ioctl.md"sv,
38 "/usr/share/man/man2/listen.md"sv,
39 "/usr/share/man/man2/mmap.md"sv,
40 "/usr/share/man/man2/mprotect.md"sv,
41 "/usr/share/man/man2/open.md"sv,
42 "/usr/share/man/man2/ptrace.md"sv,
43 "/usr/share/man/man5/perfcore.md"sv,
44 // These ones are okay:
45 "/home/anon/Tests/js-tests/test-common.js"sv,
46 "/man1/index.html"sv,
47 "/man2/index.html"sv,
48 "/man3/index.html"sv,
49 "/man4/index.html"sv,
50 "/man5/index.html"sv,
51 "/man6/index.html"sv,
52 "/man7/index.html"sv,
53 "/man8/index.html"sv,
54 "index.html"sv,
55 };
56 for (auto const& suffix : acceptable_missing_files) {
57 if (filename.ends_with(suffix))
58 return true;
59 }
60 return false;
61}
62
63struct FileLink {
64 DeprecatedString file_path; // May be empty, but not null
65 DeprecatedString anchor; // May be null ("foo.md", "bar.png"), may be empty ("baz.md#")
66 DeprecatedString label; // May be empty, but not null
67};
68
69class MarkdownLinkage final : Markdown::Visitor {
70public:
71 ~MarkdownLinkage() = default;
72
73 static MarkdownLinkage analyze(Markdown::Document const&);
74
75 bool has_anchor(DeprecatedString const& anchor) const { return m_anchors.contains(anchor); }
76 HashTable<DeprecatedString> const& anchors() const { return m_anchors; }
77 bool has_invalid_link() const { return m_has_invalid_link; }
78 Vector<FileLink> const& file_links() const { return m_file_links; }
79
80private:
81 MarkdownLinkage()
82 {
83 auto const* source_directory = getenv("SERENITY_SOURCE_DIR");
84 if (source_directory != nullptr) {
85 m_serenity_source_directory = source_directory;
86 } else {
87 warnln("The environment variable SERENITY_SOURCE_DIR was not found. Link checking inside Serenity's filesystem will fail.");
88 }
89 }
90
91 virtual RecursionDecision visit(Markdown::Heading const&) override;
92 virtual RecursionDecision visit(Markdown::Text::LinkNode const&) override;
93
94 HashTable<DeprecatedString> m_anchors;
95 Vector<FileLink> m_file_links;
96 bool m_has_invalid_link { false };
97
98 DeprecatedString m_serenity_source_directory;
99};
100
101MarkdownLinkage MarkdownLinkage::analyze(Markdown::Document const& document)
102{
103 MarkdownLinkage linkage;
104
105 document.walk(linkage);
106
107 return linkage;
108}
109
110class StringCollector final : Markdown::Visitor {
111public:
112 StringCollector() = default;
113 virtual ~StringCollector() = default;
114
115 DeprecatedString build() { return m_builder.to_deprecated_string(); }
116
117 static DeprecatedString from(Markdown::Heading const& heading)
118 {
119 StringCollector collector;
120 heading.walk(collector);
121 return collector.build();
122 }
123
124 static DeprecatedString from(Markdown::Text::Node const& node)
125 {
126 StringCollector collector;
127 node.walk(collector);
128 return collector.build();
129 }
130
131private:
132 virtual RecursionDecision visit(DeprecatedString const& text) override
133 {
134 m_builder.append(text);
135 return RecursionDecision::Recurse;
136 }
137
138 StringBuilder m_builder;
139};
140
141static DeprecatedString slugify(DeprecatedString const& text)
142{
143 // TODO: This feels like it belongs into LibWeb.
144 DeprecatedString slug = text.to_lowercase();
145 // Reverse-engineered through github, using:
146 // find AK/ Base/ Documentation/ Kernel/ Meta/ Ports/ Tests/ Userland/ -name '*.md' | xargs grep --color=always -Pin '^##+ .*[^a-z0-9 ?()`_:/!&|.$'"'"',<>"+-]' README.md
147 slug = slug.replace(" "sv, "-"sv, ReplaceMode::All)
148 .replace("!"sv, ""sv, ReplaceMode::All)
149 .replace("?"sv, ""sv, ReplaceMode::All)
150 .replace("("sv, ""sv, ReplaceMode::All)
151 .replace(")"sv, ""sv, ReplaceMode::All)
152 .replace(":"sv, ""sv, ReplaceMode::All)
153 .replace("/"sv, "-"sv, ReplaceMode::All)
154 .replace("&"sv, ""sv, ReplaceMode::All)
155 .replace("|"sv, ""sv, ReplaceMode::All)
156 .replace("."sv, ""sv, ReplaceMode::All)
157 .replace("$"sv, ""sv, ReplaceMode::All)
158 .replace("'"sv, ""sv, ReplaceMode::All)
159 .replace(","sv, ""sv, ReplaceMode::All)
160 .replace("\""sv, ""sv, ReplaceMode::All)
161 .replace("+"sv, ""sv, ReplaceMode::All)
162 .replace("\\"sv, ""sv, ReplaceMode::All)
163 .replace("<"sv, ""sv, ReplaceMode::All)
164 .replace(">"sv, ""sv, ReplaceMode::All);
165 // What about "="?
166 return slug;
167}
168
169RecursionDecision MarkdownLinkage::visit(Markdown::Heading const& heading)
170{
171 m_anchors.set(slugify(StringCollector::from(heading)));
172 return RecursionDecision::Recurse;
173}
174
175RecursionDecision MarkdownLinkage::visit(Markdown::Text::LinkNode const& link_node)
176{
177 DeprecatedString const& href = link_node.href;
178 if (href.is_null()) {
179 // Nothing to do here.
180 return RecursionDecision::Recurse;
181 }
182 auto url = URL::create_with_url_or_path(href);
183 if (url.is_valid()) {
184 if (url.scheme() == "https" || url.scheme() == "http") {
185 outln("Not checking external link {}", href);
186 return RecursionDecision::Recurse;
187 }
188 if (url.scheme() == "help") {
189 if (url.host() != "man") {
190 warnln("help:// URL without 'man': {}", href);
191 m_has_invalid_link = true;
192 return RecursionDecision::Recurse;
193 }
194 if (url.paths().size() < 2) {
195 warnln("help://man URL is missing section or page: {}", href);
196 m_has_invalid_link = true;
197 return RecursionDecision::Recurse;
198 }
199
200 // Remove leading '/' from the path.
201 auto file = DeprecatedString::formatted("{}/Base/usr/share/man/man{}.md", m_serenity_source_directory, url.path().substring(1));
202
203 m_file_links.append({ file, DeprecatedString(), StringCollector::from(*link_node.text) });
204 return RecursionDecision::Recurse;
205 }
206 if (url.scheme() == "file") {
207 if (url.path().contains("man"sv) && url.path().ends_with(".md"sv)) {
208 warnln("Inter-manpage link without the help:// scheme: {}\nPlease use help URLs of the form 'help://man/<section>/<subsection...>/<page>'", href);
209 m_has_invalid_link = true;
210 return RecursionDecision::Recurse;
211 }
212 // TODO: Check more possible links other than icons.
213 if (url.path().starts_with("/res/icons/"sv)) {
214 auto file = DeprecatedString::formatted("{}/Base{}", m_serenity_source_directory, url.path());
215 m_file_links.append({ file, DeprecatedString(), StringCollector::from(*link_node.text) });
216 } else if (url.path().starts_with("/bin"sv)) {
217 StringBuilder builder;
218 link_node.text->render_to_html(builder);
219 auto link_text = builder.string_view();
220 if (link_text != "Open"sv) {
221 warnln("Binary link named '{}' is not allowed, binary links must be called 'Open'. Linked binary: {}", link_text, href);
222 m_has_invalid_link = true;
223 }
224 } else {
225 outln("Not checking local link {}", href);
226 }
227 return RecursionDecision::Recurse;
228 }
229 }
230
231 DeprecatedString label = StringCollector::from(*link_node.text);
232 Optional<size_t> last_hash = href.find_last('#');
233 if (last_hash.has_value()) {
234 m_file_links.append({ href.substring(0, last_hash.value()), href.substring(last_hash.value() + 1), label });
235 } else {
236 m_file_links.append({ href, DeprecatedString(), label });
237 }
238
239 return RecursionDecision::Recurse;
240}
241
242ErrorOr<int> serenity_main(Main::Arguments arguments)
243{
244 Core::ArgsParser args_parser;
245 Vector<StringView> file_paths;
246 args_parser.add_positional_argument(file_paths, "Path to markdown files to read and parse", "paths", Core::ArgsParser::Required::Yes);
247 args_parser.parse(arguments);
248
249 outln("Reading and parsing Markdown files ...");
250 HashMap<DeprecatedString, MarkdownLinkage> files;
251 for (auto path : file_paths) {
252 auto file_or_error = Core::File::open(path, Core::File::OpenMode::Read);
253 if (file_or_error.is_error()) {
254 warnln("Failed to open {}: {}", path, file_or_error.error());
255 // Since this should never happen anyway, fail early.
256 return file_or_error.release_error();
257 }
258 auto file = file_or_error.release_value();
259
260 auto content_buffer_or_error = file->read_until_eof();
261 if (content_buffer_or_error.is_error()) {
262 warnln("Failed to read {}: {}", path, file_or_error.error());
263 // Since this should never happen anyway, fail early.
264 return file_or_error.release_error();
265 }
266 auto content_buffer = content_buffer_or_error.release_value();
267
268 auto content = StringView(content_buffer);
269 auto document = Markdown::Document::parse(content);
270 if (!document) {
271 warnln("Failed to parse {} due to an unspecified error.", path);
272 // Since this should never happen anyway, fail early.
273 return 1;
274 }
275 files.set(Core::DeprecatedFile::real_path_for(path), MarkdownLinkage::analyze(*document));
276 }
277
278 outln("Checking links ...");
279 bool any_problems = false;
280 for (auto const& file_item : files) {
281 if (file_item.value.has_invalid_link()) {
282 outln("File '{}' has invalid links.", file_item.key);
283 any_problems = true;
284 continue;
285 }
286
287 auto file_lexical_path = LexicalPath(file_item.key);
288 auto file_dir = file_lexical_path.dirname();
289 for (auto const& file_link : file_item.value.file_links()) {
290 DeprecatedString pointee_file;
291 if (file_link.file_path.is_empty()) {
292 pointee_file = file_item.key;
293 } else {
294 pointee_file = LexicalPath::absolute_path(file_dir, file_link.file_path);
295 }
296 if (!Core::DeprecatedFile::exists(pointee_file) && !is_missing_file_acceptable(pointee_file)) {
297 outln("File '{}' points to '{}' (label '{}'), but '{}' does not exist!",
298 file_item.key, file_link.file_path, file_link.label, pointee_file);
299 any_problems = true;
300 continue;
301 }
302 if (file_link.anchor.is_empty()) {
303 // No anchor to test for.
304 continue;
305 }
306
307 auto pointee_linkage = files.find(pointee_file);
308 if (pointee_linkage == files.end()) {
309 outln("File '{}' points to file '{}', which exists, but was not scanned. Add it to the command-line arguments and re-run.",
310 file_item.key, pointee_file);
311 any_problems = true;
312 continue;
313 }
314
315 if (!pointee_linkage->value.has_anchor(file_link.anchor)) {
316 outln("File '{}' points to '{}#{}' (label '{}'), but file '{}' does not have any heading that results in the anchor '{}'.",
317 file_item.key, file_link.file_path, file_link.anchor, file_link.label, pointee_file, file_link.anchor);
318 out(" The following anchors seem to be available:\n ");
319 bool any_anchors = false;
320 for (auto const& anchor : pointee_linkage->value.anchors()) {
321 if (any_anchors)
322 out(", ");
323 out("'{}'", anchor);
324 any_anchors = true;
325 }
326 if (!any_anchors)
327 out("(none)");
328 outln();
329 any_problems = true;
330 }
331 }
332 }
333
334 if (any_problems) {
335 outln("Done. Some errors were encountered, please check above log.");
336 return 1;
337 } else {
338 outln("Done. No problems detected.");
339 return 0;
340 }
341}