Markdown parser fork with extended syntax for personal use.
1use markdown::{
2 mdast::{Html, Node, Paragraph, Root, Text},
3 message, to_html, to_html_with_options, to_mdast,
4 unist::Position,
5 CompileOptions, Constructs, Options, ParseOptions,
6};
7use pretty_assertions::assert_eq;
8
9#[test]
10fn html_text() -> Result<(), message::Message> {
11 let danger = Options {
12 compile: CompileOptions {
13 allow_dangerous_html: true,
14 allow_dangerous_protocol: true,
15 ..Default::default()
16 },
17 ..Default::default()
18 };
19
20 assert_eq!(
21 to_html("a <b> c"),
22 "<p>a <b> c</p>",
23 "should encode dangerous html by default"
24 );
25
26 assert_eq!(
27 to_html_with_options("<a><bab><c2c>", &danger)?,
28 "<p><a><bab><c2c></p>",
29 "should support opening tags"
30 );
31
32 assert_eq!(
33 to_html_with_options("<a/><b2/>", &danger)?,
34 "<p><a/><b2/></p>",
35 "should support self-closing tags"
36 );
37
38 assert_eq!(
39 to_html_with_options("<a /><b2\ndata=\"foo\" >", &danger)?,
40 "<p><a /><b2\ndata=\"foo\" ></p>",
41 "should support whitespace in tags"
42 );
43
44 assert_eq!(
45 to_html_with_options(
46 "<a foo=\"bar\" bam = 'baz <em>\"</em>'\n_boolean zoop:33=zoop:33 />",
47 &danger
48 )?,
49 "<p><a foo=\"bar\" bam = 'baz <em>\"</em>'\n_boolean zoop:33=zoop:33 /></p>",
50 "should support attributes on tags"
51 );
52
53 assert_eq!(
54 to_html_with_options("Foo <responsive-image src=\"foo.jpg\" />", &danger)?,
55 "<p>Foo <responsive-image src=\"foo.jpg\" /></p>",
56 "should support non-html tags"
57 );
58
59 assert_eq!(
60 to_html_with_options("<33> <__>", &danger)?,
61 "<p><33> <__></p>",
62 "should not support nonconforming tag names"
63 );
64
65 assert_eq!(
66 to_html_with_options("<a h*#ref=\"hi\">", &danger)?,
67 "<p><a h*#ref="hi"></p>",
68 "should not support nonconforming attribute names"
69 );
70
71 assert_eq!(
72 to_html_with_options("<a href=\"hi'> <a href=hi'>", &danger)?,
73 "<p><a href="hi'> <a href=hi'></p>",
74 "should not support nonconforming attribute values"
75 );
76
77 assert_eq!(
78 to_html_with_options("< a><\nfoo><bar/ >\n<foo bar=baz\nbim!bop />", &danger)?,
79 "<p>< a><\nfoo><bar/ >\n<foo bar=baz\nbim!bop /></p>",
80 "should not support nonconforming whitespace"
81 );
82
83 assert_eq!(
84 to_html_with_options("<a href='bar'title=title>", &danger)?,
85 "<p><a href='bar'title=title></p>",
86 "should not support missing whitespace"
87 );
88
89 assert_eq!(
90 to_html_with_options("</a></foo >", &danger)?,
91 "<p></a></foo ></p>",
92 "should support closing tags"
93 );
94
95 assert_eq!(
96 to_html_with_options("</a href=\"foo\">", &danger)?,
97 "<p></a href="foo"></p>",
98 "should not support closing tags w/ attributes"
99 );
100
101 assert_eq!(
102 to_html_with_options("foo <!-- this is a\ncomment - with hyphen -->", &danger)?,
103 "<p>foo <!-- this is a\ncomment - with hyphen --></p>",
104 "should support comments"
105 );
106
107 assert_eq!(
108 to_html_with_options("foo <!-- not a comment -- two hyphens -->", &danger)?,
109 "<p>foo <!-- not a comment -- two hyphens --></p>",
110 "should support comments w/ two dashes inside"
111 );
112
113 assert_eq!(
114 to_html_with_options("foo <!--> foo -->", &danger)?,
115 "<p>foo <!--> foo --></p>",
116 "should support nonconforming comments (1)"
117 );
118
119 assert_eq!(
120 to_html_with_options("foo <!-- foo--->", &danger)?,
121 "<p>foo <!-- foo---></p>",
122 "should support nonconforming comments (2)"
123 );
124
125 assert_eq!(
126 to_html_with_options("foo <?php echo $a; ?>", &danger)?,
127 "<p>foo <?php echo $a; ?></p>",
128 "should support instructions"
129 );
130
131 assert_eq!(
132 to_html_with_options("foo <!ELEMENT br EMPTY>", &danger)?,
133 "<p>foo <!ELEMENT br EMPTY></p>",
134 "should support declarations"
135 );
136
137 assert_eq!(
138 to_html_with_options("foo <![CDATA[>&<]]>", &danger)?,
139 "<p>foo <![CDATA[>&<]]></p>",
140 "should support cdata"
141 );
142
143 assert_eq!(
144 to_html_with_options("foo <a href=\"ö\">", &danger)?,
145 "<p>foo <a href=\"ö\"></p>",
146 "should support (ignore) character references"
147 );
148
149 assert_eq!(
150 to_html_with_options("foo <a href=\"\\*\">", &danger)?,
151 "<p>foo <a href=\"\\*\"></p>",
152 "should not support character escapes (1)"
153 );
154
155 assert_eq!(
156 to_html_with_options("<a href=\"\\\"\">", &danger)?,
157 "<p><a href="""></p>",
158 "should not support character escapes (2)"
159 );
160
161 // Extra:
162 assert_eq!(
163 to_html_with_options("foo <!1>", &danger)?,
164 "<p>foo <!1></p>",
165 "should not support non-comment, non-cdata, and non-named declaration"
166 );
167
168 assert_eq!(
169 to_html_with_options("foo <!-not enough!-->", &danger)?,
170 "<p>foo <!-not enough!--></p>",
171 "should not support comments w/ not enough dashes"
172 );
173
174 assert_eq!(
175 to_html_with_options("foo <!---ok-->", &danger)?,
176 "<p>foo <!---ok--></p>",
177 "should support comments that start w/ a dash, if it’s not followed by a greater than"
178 );
179
180 assert_eq!(
181 to_html_with_options("foo <!--->", &danger)?,
182 "<p>foo <!---></p>",
183 "should support comments that start w/ `->`"
184 );
185
186 assert_eq!(
187 to_html_with_options("foo <!-- -> -->", &danger)?,
188 "<p>foo <!-- -> --></p>",
189 "should support `->` in a comment"
190 );
191
192 assert_eq!(
193 to_html_with_options("foo <!--", &danger)?,
194 "<p>foo <!--</p>",
195 "should not support eof in a comment (1)"
196 );
197
198 assert_eq!(
199 to_html_with_options("foo <!--a", &danger)?,
200 "<p>foo <!--a</p>",
201 "should not support eof in a comment (2)"
202 );
203
204 assert_eq!(
205 to_html_with_options("foo <!--a-", &danger)?,
206 "<p>foo <!--a-</p>",
207 "should not support eof in a comment (3)"
208 );
209
210 assert_eq!(
211 to_html_with_options("foo <!--a--", &danger)?,
212 "<p>foo <!--a--</p>",
213 "should not support eof in a comment (4)"
214 );
215
216 // Note: cmjs parses this differently.
217 // See: <https://github.com/commonmark/commonmark.js/issues/193>
218 assert_eq!(
219 to_html_with_options("foo <![cdata[]]>", &danger)?,
220 "<p>foo <![cdata[]]></p>",
221 "should not support lowercase “cdata”"
222 );
223
224 assert_eq!(
225 to_html_with_options("foo <![CDATA", &danger)?,
226 "<p>foo <![CDATA</p>",
227 "should not support eof in a CDATA (1)"
228 );
229
230 assert_eq!(
231 to_html_with_options("foo <![CDATA[", &danger)?,
232 "<p>foo <![CDATA[</p>",
233 "should not support eof in a CDATA (2)"
234 );
235
236 assert_eq!(
237 to_html_with_options("foo <![CDATA[]", &danger)?,
238 "<p>foo <![CDATA[]</p>",
239 "should not support eof in a CDATA (3)"
240 );
241
242 assert_eq!(
243 to_html_with_options("foo <![CDATA[]]", &danger)?,
244 "<p>foo <![CDATA[]]</p>",
245 "should not support eof in a CDATA (4)"
246 );
247
248 assert_eq!(
249 to_html_with_options("foo <![CDATA[asd", &danger)?,
250 "<p>foo <![CDATA[asd</p>",
251 "should not support eof in a CDATA (5)"
252 );
253
254 assert_eq!(
255 to_html_with_options("foo <![CDATA[]]]]>", &danger)?,
256 "<p>foo <![CDATA[]]]]></p>",
257 "should support end-like constructs in CDATA"
258 );
259
260 assert_eq!(
261 to_html_with_options("foo <!doctype", &danger)?,
262 "<p>foo <!doctype</p>",
263 "should not support eof in declarations"
264 );
265
266 assert_eq!(
267 to_html_with_options("foo <?php", &danger)?,
268 "<p>foo <?php</p>",
269 "should not support eof in instructions (1)"
270 );
271
272 assert_eq!(
273 to_html_with_options("foo <?php?", &danger)?,
274 "<p>foo <?php?</p>",
275 "should not support eof in instructions (2)"
276 );
277
278 assert_eq!(
279 to_html_with_options("foo <???>", &danger)?,
280 "<p>foo <???></p>",
281 "should support question marks in instructions"
282 );
283
284 assert_eq!(
285 to_html_with_options("foo </3>", &danger)?,
286 "<p>foo </3></p>",
287 "should not support closing tags that don’t start w/ alphas"
288 );
289
290 assert_eq!(
291 to_html_with_options("foo </a->", &danger)?,
292 "<p>foo </a-></p>",
293 "should support dashes in closing tags"
294 );
295
296 assert_eq!(
297 to_html_with_options("foo </a >", &danger)?,
298 "<p>foo </a ></p>",
299 "should support whitespace after closing tag names"
300 );
301
302 assert_eq!(
303 to_html_with_options("foo </a!>", &danger)?,
304 "<p>foo </a!></p>",
305 "should not support other characters after closing tag names"
306 );
307
308 assert_eq!(
309 to_html_with_options("foo <a->", &danger)?,
310 "<p>foo <a-></p>",
311 "should support dashes in opening tags"
312 );
313
314 assert_eq!(
315 to_html_with_options("foo <a >", &danger)?,
316 "<p>foo <a ></p>",
317 "should support whitespace after opening tag names"
318 );
319
320 assert_eq!(
321 to_html_with_options("foo <a!>", &danger)?,
322 "<p>foo <a!></p>",
323 "should not support other characters after opening tag names"
324 );
325
326 assert_eq!(
327 to_html_with_options("foo <a !>", &danger)?,
328 "<p>foo <a !></p>",
329 "should not support other characters in opening tags (1)"
330 );
331
332 assert_eq!(
333 to_html_with_options("foo <a b!>", &danger)?,
334 "<p>foo <a b!></p>",
335 "should not support other characters in opening tags (2)"
336 );
337
338 assert_eq!(
339 to_html_with_options("foo <a b/>", &danger)?,
340 "<p>foo <a b/></p>",
341 "should support a self-closing slash after an attribute name"
342 );
343
344 assert_eq!(
345 to_html_with_options("foo <a b>", &danger)?,
346 "<p>foo <a b></p>",
347 "should support a greater than after an attribute name"
348 );
349
350 assert_eq!(
351 to_html_with_options("foo <a b=<>", &danger)?,
352 "<p>foo <a b=<></p>",
353 "should not support less than to start an unquoted attribute value"
354 );
355
356 assert_eq!(
357 to_html_with_options("foo <a b=>>", &danger)?,
358 "<p>foo <a b=>></p>",
359 "should not support greater than to start an unquoted attribute value"
360 );
361
362 assert_eq!(
363 to_html_with_options("foo <a b==>", &danger)?,
364 "<p>foo <a b==></p>",
365 "should not support equals to to start an unquoted attribute value"
366 );
367
368 assert_eq!(
369 to_html_with_options("foo <a b=`>", &danger)?,
370 "<p>foo <a b=`></p>",
371 "should not support grave accent to start an unquoted attribute value"
372 );
373
374 assert_eq!(
375 to_html_with_options("foo <a b=\"asd", &danger)?,
376 "<p>foo <a b="asd</p>",
377 "should not support eof in double quoted attribute value"
378 );
379
380 assert_eq!(
381 to_html_with_options("foo <a b='asd", &danger)?,
382 "<p>foo <a b='asd</p>",
383 "should not support eof in single quoted attribute value"
384 );
385
386 assert_eq!(
387 to_html_with_options("foo <a b=asd", &danger)?,
388 "<p>foo <a b=asd</p>",
389 "should not support eof in unquoted attribute value"
390 );
391
392 assert_eq!(
393 to_html_with_options("foo <a b=\nasd>", &danger)?,
394 "<p>foo <a b=\nasd></p>",
395 "should support an eol before an attribute value"
396 );
397
398 assert_eq!(
399to_html_with_options("<x> a", &danger)?,
400"<p><x> a</p>",
401"should support starting a line w/ a tag if followed by anything other than an eol (after optional space/tabs)"
402);
403
404 assert_eq!(
405 to_html_with_options("<span foo=", &danger)?,
406 "<p><span foo=</p>",
407 "should support an EOF before an attribute value"
408 );
409
410 assert_eq!(
411 to_html_with_options("a <!b\nc>", &danger)?,
412 "<p>a <!b\nc></p>",
413 "should support an EOL in a declaration"
414 );
415 assert_eq!(
416 to_html_with_options("a <![CDATA[\n]]>", &danger)?,
417 "<p>a <![CDATA[\n]]></p>",
418 "should support an EOL in cdata"
419 );
420
421 // Note: cmjs parses this differently.
422 // See: <https://github.com/commonmark/commonmark.js/issues/196>
423 assert_eq!(
424 to_html_with_options("a <?\n?>", &danger)?,
425 "<p>a <?\n?></p>",
426 "should support an EOL in an instruction"
427 );
428
429 assert_eq!(
430 to_html_with_options(
431 "a <x>",
432 &Options {
433 parse: ParseOptions {
434 constructs: Constructs {
435 html_text: false,
436 ..Default::default()
437 },
438 ..Default::default()
439 },
440 ..Default::default()
441 }
442 )?,
443 "<p>a <x></p>",
444 "should support turning off html (text)"
445 );
446
447 assert_eq!(
448 to_mdast("alpha <i>bravo</b> charlie.", &Default::default())?,
449 Node::Root(Root {
450 children: vec![Node::Paragraph(Paragraph {
451 children: vec![
452 Node::Text(Text {
453 value: "alpha ".into(),
454 position: Some(Position::new(1, 1, 0, 1, 7, 6))
455 }),
456 Node::Html(Html {
457 value: "<i>".into(),
458 position: Some(Position::new(1, 7, 6, 1, 10, 9))
459 }),
460 Node::Text(Text {
461 value: "bravo".into(),
462 position: Some(Position::new(1, 10, 9, 1, 15, 14))
463 }),
464 Node::Html(Html {
465 value: "</b>".into(),
466 position: Some(Position::new(1, 15, 14, 1, 19, 18))
467 }),
468 Node::Text(Text {
469 value: " charlie.".into(),
470 position: Some(Position::new(1, 19, 18, 1, 28, 27))
471 })
472 ],
473 position: Some(Position::new(1, 1, 0, 1, 28, 27))
474 })],
475 position: Some(Position::new(1, 1, 0, 1, 28, 27))
476 }),
477 "should support HTML (text) as `Html`s in mdast"
478 );
479
480 Ok(())
481}