Serenity Operating System
1/*
2 * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/LexicalPath.h>
8#include <AK/Queue.h>
9#include <AK/URL.h>
10#include <AK/URLParser.h>
11#include <LibCore/ArgsParser.h>
12#include <LibCore/DeprecatedFile.h>
13#include <LibCore/File.h>
14#include <LibMain/Main.h>
15#include <LibXML/DOM/Document.h>
16#include <LibXML/DOM/Node.h>
17#include <LibXML/Parser/Parser.h>
18
19static bool g_color = false;
20static bool g_only_contents = false;
21
22enum class ColorRole {
23 PITag,
24 PITarget,
25 PIData,
26 AttributeName,
27 Eq,
28 AttributeValue,
29 Tag,
30 Text,
31 Comment,
32 Reset,
33 Doctype,
34 Keyword,
35};
36static void color(ColorRole role)
37{
38 if (!g_color)
39 return;
40
41 switch (role) {
42 case ColorRole::PITag:
43 case ColorRole::Doctype:
44 out("\x1b[{};{}m", 1, "38;5;223");
45 break;
46 case ColorRole::PITarget:
47 out("\x1b[{};{}m", 1, "38;5;23");
48 break;
49 case ColorRole::PIData:
50 out("\x1b[{};{}m", 1, "38;5;43");
51 break;
52 case ColorRole::AttributeName:
53 out("\x1b[38;5;27m");
54 break;
55 case ColorRole::Eq:
56 break;
57 case ColorRole::AttributeValue:
58 out("\x1b[38;5;46m");
59 break;
60 case ColorRole::Tag:
61 out("\x1b[{};{}m", 1, "38;5;220");
62 break;
63 case ColorRole::Text:
64 break;
65 case ColorRole::Comment:
66 out("\x1b[{};{}m", 3, "38;5;250");
67 break;
68 case ColorRole::Reset:
69 out("\x1b[0m");
70 break;
71 case ColorRole::Keyword:
72 out("\x1b[38;5;40m");
73 break;
74 }
75}
76
77static void dump(XML::Node const& node)
78{
79 node.content.visit(
80 [](XML::Node::Text const& text) {
81 out("{}", text.builder.string_view());
82 },
83 [](XML::Node::Comment const& comment) {
84 color(ColorRole::Comment);
85 out("<!--{}-->", comment.text);
86 color(ColorRole::Reset);
87 },
88 [](XML::Node::Element const& element) {
89 color(ColorRole::Tag);
90 out("<{}", element.name);
91 color(ColorRole::Reset);
92
93 if (!element.attributes.is_empty()) {
94 for (auto& attribute : element.attributes) {
95 auto quote = attribute.value.contains('"') ? '\'' : '"';
96 color(ColorRole::AttributeName);
97 out(" {}", attribute.key);
98 color(ColorRole::Eq);
99 out("=");
100 color(ColorRole::AttributeValue);
101 out("{}{}{}", quote, attribute.value, quote);
102 color(ColorRole::Reset);
103 }
104 }
105 if (element.children.is_empty()) {
106 color(ColorRole::Tag);
107 out("/>");
108 color(ColorRole::Reset);
109 } else {
110 color(ColorRole::Tag);
111 out(">");
112 color(ColorRole::Reset);
113
114 for (auto& node : element.children)
115 dump(*node);
116
117 color(ColorRole::Tag);
118 out("</{}>", element.name);
119 color(ColorRole::Reset);
120 }
121 });
122}
123
124static void dump(XML::Document& document)
125{
126 if (!g_only_contents) {
127 {
128 color(ColorRole::PITag);
129 out("<?");
130 color(ColorRole::Reset);
131 color(ColorRole::PITarget);
132 out("xml");
133 color(ColorRole::Reset);
134 color(ColorRole::PIData);
135 out(" version='{}'", document.version() == XML::Version::Version10 ? "1.0" : "1.1");
136 color(ColorRole::Reset);
137 color(ColorRole::PITag);
138 outln("?>");
139 }
140
141 for (auto& pi : document.processing_instructions()) {
142 color(ColorRole::PITag);
143 out("<?");
144 color(ColorRole::Reset);
145 color(ColorRole::PITarget);
146 out("{}", pi.key);
147 color(ColorRole::Reset);
148 if (!pi.value.is_empty()) {
149 color(ColorRole::PIData);
150 out(" {}", pi.value);
151 color(ColorRole::Reset);
152 }
153 color(ColorRole::PITag);
154 outln("?>");
155 }
156
157 if (auto maybe_doctype = document.doctype(); maybe_doctype.has_value()) {
158 auto& doctype = *maybe_doctype;
159 color(ColorRole::Doctype);
160 out("<!DOCTYPE ");
161 color(ColorRole::Tag);
162 out("{}", doctype.type);
163 if (!doctype.markup_declarations.is_empty()) {
164 color(ColorRole::Reset);
165 out(" [\n");
166 for (auto& entry : doctype.markup_declarations) {
167 entry.visit(
168 [&](XML::ElementDeclaration const& element) {
169 color(ColorRole::Doctype);
170 out(" <!ELEMENT ");
171 color(ColorRole::Tag);
172 out("{} ", element.type);
173 element.content_spec.visit(
174 [&](XML::ElementDeclaration::Empty const&) {
175 color(ColorRole::Keyword);
176 out("EMPTY");
177 },
178 [&](XML::ElementDeclaration::Any const&) {
179 color(ColorRole::Keyword);
180 out("ANY");
181 },
182 [&](XML::ElementDeclaration::Mixed const&) {
183 },
184 [&](XML::ElementDeclaration::Children const&) {
185 });
186 color(ColorRole::Doctype);
187 outln(">");
188 },
189 [&](XML::AttributeListDeclaration const& list) {
190 color(ColorRole::Doctype);
191 out(" <!ATTLIST ");
192 color(ColorRole::Tag);
193 out("{}", list.type);
194 for (auto& attribute : list.attributes) {
195 color(ColorRole::AttributeName);
196 out(" {} ", attribute.name);
197 color(ColorRole::Keyword);
198 attribute.type.visit(
199 [](XML::AttributeListDeclaration::StringType) {
200 out("CDATA");
201 },
202 [](XML::AttributeListDeclaration::TokenizedType type) {
203 switch (type) {
204 case XML::AttributeListDeclaration::TokenizedType::ID:
205 out("ID");
206 break;
207 case XML::AttributeListDeclaration::TokenizedType::IDRef:
208 out("IDREF");
209 break;
210 case XML::AttributeListDeclaration::TokenizedType::IDRefs:
211 out("IDREFS");
212 break;
213 case XML::AttributeListDeclaration::TokenizedType::Entity:
214 out("ENTITY");
215 break;
216 case XML::AttributeListDeclaration::TokenizedType::Entities:
217 out("ENTITIES");
218 break;
219 case XML::AttributeListDeclaration::TokenizedType::NMToken:
220 out("NMTOKEN");
221 break;
222 case XML::AttributeListDeclaration::TokenizedType::NMTokens:
223 out("NMTOKENS");
224 break;
225 }
226 },
227 [](XML::AttributeListDeclaration::NotationType const& type) {
228 out("NOTATION ");
229 color(ColorRole::Reset);
230 out("( ");
231 bool first = true;
232 for (auto& name : type.names) {
233 color(ColorRole::Reset);
234 if (first)
235 first = false;
236 else
237 out(" | ");
238 color(ColorRole::AttributeValue);
239 out("{}", name);
240 }
241 color(ColorRole::Reset);
242 out(" )");
243 },
244 [](XML::AttributeListDeclaration::Enumeration const& type) {
245 color(ColorRole::Reset);
246 out("( ");
247 bool first = true;
248 for (auto& name : type.tokens) {
249 color(ColorRole::Reset);
250 if (first)
251 first = false;
252 else
253 out(" | ");
254 color(ColorRole::AttributeValue);
255 out("{}", name);
256 }
257 color(ColorRole::Reset);
258 out(" )");
259 });
260 out(" ");
261 attribute.default_.visit(
262 [](XML::AttributeListDeclaration::Required) {
263 color(ColorRole::Keyword);
264 out("#REQUIRED");
265 },
266 [](XML::AttributeListDeclaration::Implied) {
267 color(ColorRole::Keyword);
268 out("#IMPLIED");
269 },
270 [](XML::AttributeListDeclaration::Fixed const& fixed) {
271 color(ColorRole::Keyword);
272 out("#FIXED ");
273 color(ColorRole::AttributeValue);
274 out("\"{}\"", fixed.value);
275 },
276 [](XML::AttributeListDeclaration::DefaultValue const& default_) {
277 color(ColorRole::AttributeValue);
278 out("\"{}\"", default_.value);
279 });
280 }
281 color(ColorRole::Doctype);
282 outln(">");
283 },
284 [&](XML::EntityDeclaration const& entity) {
285 color(ColorRole::Doctype);
286 out(" <!ENTITY ");
287 entity.visit(
288 [](XML::GEDeclaration const& declaration) {
289 color(ColorRole::Tag);
290 out("{} ", declaration.name);
291 declaration.definition.visit(
292 [](DeprecatedString const& value) {
293 color(ColorRole::AttributeValue);
294 out("\"{}\"", value);
295 },
296 [](XML::EntityDefinition const& definition) {
297 if (definition.id.public_id.has_value()) {
298 color(ColorRole::Keyword);
299 out("PUBLIC ");
300 color(ColorRole::PITarget);
301 out("\"{}\" ", definition.id.public_id->public_literal);
302 } else {
303 color(ColorRole::Keyword);
304 out("SYSTEM ");
305 }
306 color(ColorRole::PITarget);
307 out("\"{}\" ", definition.id.system_id.system_literal);
308
309 if (definition.notation.has_value()) {
310 color(ColorRole::Keyword);
311 out(" NDATA ");
312 color(ColorRole::PITarget);
313 out("{}", *definition.notation);
314 }
315 });
316 color(ColorRole::Tag);
317 outln(">");
318 },
319 [](XML::PEDeclaration const& declaration) {
320 color(ColorRole::Tag);
321 out("{} ", declaration.name);
322 declaration.definition.visit(
323 [](DeprecatedString const& value) {
324 color(ColorRole::AttributeValue);
325 out("\"{}\"", value);
326 },
327 [](XML::ExternalID const& id) {
328 if (id.public_id.has_value()) {
329 color(ColorRole::Keyword);
330 out("PUBLIC ");
331 color(ColorRole::PITarget);
332 out("\"{}\" ", id.public_id->public_literal);
333 } else {
334 color(ColorRole::Keyword);
335 out("SYSTEM ");
336 }
337 color(ColorRole::PITarget);
338 out("\"{}\"", id.system_id.system_literal);
339 });
340 color(ColorRole::Tag);
341 outln(">");
342 });
343 },
344 [&](XML::NotationDeclaration const&) {
345
346 });
347 }
348 color(ColorRole::Reset);
349 out("]");
350 }
351 color(ColorRole::Doctype);
352 outln(">");
353 }
354 }
355 dump(document.root());
356}
357
358static DeprecatedString s_path;
359static auto parse(StringView contents)
360{
361 return XML::Parser {
362 contents,
363 {
364 .preserve_comments = true,
365 .resolve_external_resource = [&](XML::SystemID const& system_id, Optional<XML::PublicID> const&) -> ErrorOr<DeprecatedString> {
366 auto base = URL::create_with_file_scheme(s_path);
367 auto url = URLParser::parse(system_id.system_literal, &base);
368 if (!url.is_valid())
369 return Error::from_string_literal("Invalid URL");
370
371 if (url.scheme() != "file")
372 return Error::from_string_literal("NYI: Nonlocal entity");
373
374 auto file = TRY(Core::File::open(url.path(), Core::File::OpenMode::Read));
375 return DeprecatedString::copy(TRY(file->read_until_eof()));
376 },
377 },
378 };
379}
380
381enum class TestResult {
382 Passed,
383 Failed,
384 RunnerFailed,
385};
386static HashMap<DeprecatedString, TestResult> s_test_results {};
387static void do_run_tests(XML::Document& document)
388{
389 auto& root = document.root().content.get<XML::Node::Element>();
390 VERIFY(root.name == "TESTSUITE");
391 Queue<XML::Node*> suites;
392 auto dump_cases = [&](auto& root) {
393 for (auto& node : root.children) {
394 auto element = node->content.template get_pointer<XML::Node::Element>();
395 if (!element)
396 continue;
397 if (element->name != "TESTCASES" && element->name != "TEST")
398 continue;
399 suites.enqueue(node);
400 }
401 };
402
403 dump_cases(root);
404
405 auto base_path = LexicalPath::dirname(s_path);
406
407 while (!suites.is_empty()) {
408 auto& node = *suites.dequeue();
409 auto& suite = node.content.get<XML::Node::Element>();
410 if (suite.name == "TESTCASES") {
411 dump_cases(suite);
412 continue;
413 }
414 if (suite.name == "TEST") {
415 Vector<StringView> bases;
416 for (auto* parent = node.parent; parent; parent = parent->parent) {
417 auto& attributes = parent->content.get<XML::Node::Element>().attributes;
418 auto it = attributes.find("xml:base");
419 if (it == attributes.end())
420 continue;
421 bases.append(it->value);
422 }
423
424 auto type = suite.attributes.find("TYPE")->value;
425
426 StringBuilder path_builder;
427 path_builder.append(base_path);
428 path_builder.append('/');
429 for (auto& entry : bases.in_reverse()) {
430 path_builder.append(entry);
431 path_builder.append('/');
432 }
433 auto test_base_path = path_builder.to_deprecated_string();
434
435 path_builder.append(suite.attributes.find("URI")->value);
436 auto url = URL::create_with_file_scheme(path_builder.string_view());
437 if (!url.is_valid()) {
438 warnln("Invalid URL {}", path_builder.string_view());
439 s_test_results.set(path_builder.string_view(), TestResult::RunnerFailed);
440 continue;
441 }
442
443 auto file_result = Core::File::open(url.path(), Core::File::OpenMode::Read);
444 if (file_result.is_error()) {
445 warnln("Read error for {}: {}", url.path(), file_result.error());
446 s_test_results.set(url.path(), TestResult::RunnerFailed);
447 continue;
448 }
449
450 warnln("Running test {}", url.path());
451
452 auto contents = file_result.value()->read_until_eof();
453 if (contents.is_error()) {
454 warnln("Read error for {}: {}", url.path(), contents.error());
455 s_test_results.set(url.path(), TestResult::RunnerFailed);
456 continue;
457 }
458 auto parser = parse(contents.value());
459 auto doc_or_error = parser.parse();
460 if (doc_or_error.is_error()) {
461 if (type == "invalid" || type == "error" || type == "not-wf")
462 s_test_results.set(url.path(), TestResult::Passed);
463 else
464 s_test_results.set(url.path(), TestResult::Failed);
465 continue;
466 }
467
468 auto out = suite.attributes.find("OUTPUT");
469 if (out != suite.attributes.end()) {
470 auto out_path = LexicalPath::join(test_base_path, out->value).string();
471 auto file_result = Core::File::open(out_path, Core::File::OpenMode::Read);
472 if (file_result.is_error()) {
473 warnln("Read error for {}: {}", out_path, file_result.error());
474 s_test_results.set(url.path(), TestResult::RunnerFailed);
475 continue;
476 }
477 auto contents = file_result.value()->read_until_eof();
478 if (contents.is_error()) {
479 warnln("Read error for {}: {}", out_path, contents.error());
480 s_test_results.set(url.path(), TestResult::RunnerFailed);
481 continue;
482 }
483 auto parser = parse(contents.value());
484 auto out_doc_or_error = parser.parse();
485 if (out_doc_or_error.is_error()) {
486 warnln("Parse error for {}: {}", out_path, out_doc_or_error.error());
487 s_test_results.set(url.path(), TestResult::RunnerFailed);
488 continue;
489 }
490 auto out_doc = out_doc_or_error.release_value();
491 if (out_doc.root() != doc_or_error.value().root()) {
492 s_test_results.set(url.path(), TestResult::Failed);
493 continue;
494 }
495 }
496
497 if (type == "invalid" || type == "error" || type == "not-wf")
498 s_test_results.set(url.path(), TestResult::Failed);
499 else
500 s_test_results.set(url.path(), TestResult::Passed);
501 }
502 }
503}
504
505ErrorOr<int> serenity_main(Main::Arguments arguments)
506{
507 StringView filename;
508 bool run_tests { false };
509
510 Core::ArgsParser parser;
511 parser.set_general_help("Parse and dump XML files");
512 parser.add_option(g_color, "Syntax highlight the output", "color", 'c');
513 parser.add_option(g_only_contents, "Only display markup and text", "only-contents", 'o');
514 parser.add_option(run_tests, "Run tests", "run-tests", 't');
515 parser.add_positional_argument(filename, "File to read from", "file");
516 parser.parse(arguments);
517
518 s_path = Core::DeprecatedFile::real_path_for(filename);
519 auto file = TRY(Core::File::open(s_path, Core::File::OpenMode::Read));
520 auto contents = TRY(file->read_until_eof());
521
522 auto xml_parser = parse(contents);
523 auto result = xml_parser.parse();
524 if (result.is_error()) {
525 if (xml_parser.parse_error_causes().is_empty()) {
526 warnln("{}", result.error());
527 } else {
528 warnln("{}; caused by:", result.error());
529 for (auto const& cause : xml_parser.parse_error_causes())
530 warnln(" {}", cause);
531 }
532 return 1;
533 }
534
535 auto doc = result.release_value();
536 if (run_tests) {
537 do_run_tests(doc);
538 size_t passed = 0;
539 size_t failed = 0;
540 size_t runner_error = 0;
541 size_t total = 0;
542 for (auto& entry : s_test_results) {
543 total++;
544 switch (entry.value) {
545 case TestResult::Passed:
546 passed++;
547 break;
548 case TestResult::Failed:
549 failed++;
550 break;
551 case TestResult::RunnerFailed:
552 runner_error++;
553 break;
554 }
555 }
556 outln("{} passed, {} failed, {} runner failed of {} tests run.", passed, failed, runner_error, total);
557 return 0;
558 }
559
560 dump(doc);
561 if (!g_only_contents)
562 outln();
563
564 return 0;
565}