Markdown parser fork with extended syntax for personal use.
at main 481 lines 14 kB view raw
1use markdown::{ 2 mdast::{Html, Node, Paragraph, Root, Text}, 3 message, to_html, to_html_with_options, to_mdast, 4 unist::Position, 5 CompileOptions, Constructs, Options, ParseOptions, 6}; 7use pretty_assertions::assert_eq; 8 9#[test] 10fn html_text() -> Result<(), message::Message> { 11 let danger = Options { 12 compile: CompileOptions { 13 allow_dangerous_html: true, 14 allow_dangerous_protocol: true, 15 ..Default::default() 16 }, 17 ..Default::default() 18 }; 19 20 assert_eq!( 21 to_html("a <b> c"), 22 "<p>a &lt;b&gt; c</p>", 23 "should encode dangerous html by default" 24 ); 25 26 assert_eq!( 27 to_html_with_options("<a><bab><c2c>", &danger)?, 28 "<p><a><bab><c2c></p>", 29 "should support opening tags" 30 ); 31 32 assert_eq!( 33 to_html_with_options("<a/><b2/>", &danger)?, 34 "<p><a/><b2/></p>", 35 "should support self-closing tags" 36 ); 37 38 assert_eq!( 39 to_html_with_options("<a /><b2\ndata=\"foo\" >", &danger)?, 40 "<p><a /><b2\ndata=\"foo\" ></p>", 41 "should support whitespace in tags" 42 ); 43 44 assert_eq!( 45 to_html_with_options( 46 "<a foo=\"bar\" bam = 'baz <em>\"</em>'\n_boolean zoop:33=zoop:33 />", 47 &danger 48 )?, 49 "<p><a foo=\"bar\" bam = 'baz <em>\"</em>'\n_boolean zoop:33=zoop:33 /></p>", 50 "should support attributes on tags" 51 ); 52 53 assert_eq!( 54 to_html_with_options("Foo <responsive-image src=\"foo.jpg\" />", &danger)?, 55 "<p>Foo <responsive-image src=\"foo.jpg\" /></p>", 56 "should support non-html tags" 57 ); 58 59 assert_eq!( 60 to_html_with_options("<33> <__>", &danger)?, 61 "<p>&lt;33&gt; &lt;__&gt;</p>", 62 "should not support nonconforming tag names" 63 ); 64 65 assert_eq!( 66 to_html_with_options("<a h*#ref=\"hi\">", &danger)?, 67 "<p>&lt;a h*#ref=&quot;hi&quot;&gt;</p>", 68 "should not support nonconforming attribute names" 69 ); 70 71 assert_eq!( 72 to_html_with_options("<a href=\"hi'> <a href=hi'>", &danger)?, 73 "<p>&lt;a href=&quot;hi'&gt; &lt;a href=hi'&gt;</p>", 74 "should not support nonconforming attribute values" 75 ); 76 77 assert_eq!( 78 to_html_with_options("< a><\nfoo><bar/ >\n<foo bar=baz\nbim!bop />", &danger)?, 79 "<p>&lt; a&gt;&lt;\nfoo&gt;&lt;bar/ &gt;\n&lt;foo bar=baz\nbim!bop /&gt;</p>", 80 "should not support nonconforming whitespace" 81 ); 82 83 assert_eq!( 84 to_html_with_options("<a href='bar'title=title>", &danger)?, 85 "<p>&lt;a href='bar'title=title&gt;</p>", 86 "should not support missing whitespace" 87 ); 88 89 assert_eq!( 90 to_html_with_options("</a></foo >", &danger)?, 91 "<p></a></foo ></p>", 92 "should support closing tags" 93 ); 94 95 assert_eq!( 96 to_html_with_options("</a href=\"foo\">", &danger)?, 97 "<p>&lt;/a href=&quot;foo&quot;&gt;</p>", 98 "should not support closing tags w/ attributes" 99 ); 100 101 assert_eq!( 102 to_html_with_options("foo <!-- this is a\ncomment - with hyphen -->", &danger)?, 103 "<p>foo <!-- this is a\ncomment - with hyphen --></p>", 104 "should support comments" 105 ); 106 107 assert_eq!( 108 to_html_with_options("foo <!-- not a comment -- two hyphens -->", &danger)?, 109 "<p>foo <!-- not a comment -- two hyphens --></p>", 110 "should support comments w/ two dashes inside" 111 ); 112 113 assert_eq!( 114 to_html_with_options("foo <!--> foo -->", &danger)?, 115 "<p>foo <!--> foo --&gt;</p>", 116 "should support nonconforming comments (1)" 117 ); 118 119 assert_eq!( 120 to_html_with_options("foo <!-- foo--->", &danger)?, 121 "<p>foo <!-- foo---></p>", 122 "should support nonconforming comments (2)" 123 ); 124 125 assert_eq!( 126 to_html_with_options("foo <?php echo $a; ?>", &danger)?, 127 "<p>foo <?php echo $a; ?></p>", 128 "should support instructions" 129 ); 130 131 assert_eq!( 132 to_html_with_options("foo <!ELEMENT br EMPTY>", &danger)?, 133 "<p>foo <!ELEMENT br EMPTY></p>", 134 "should support declarations" 135 ); 136 137 assert_eq!( 138 to_html_with_options("foo <![CDATA[>&<]]>", &danger)?, 139 "<p>foo <![CDATA[>&<]]></p>", 140 "should support cdata" 141 ); 142 143 assert_eq!( 144 to_html_with_options("foo <a href=\"&ouml;\">", &danger)?, 145 "<p>foo <a href=\"&ouml;\"></p>", 146 "should support (ignore) character references" 147 ); 148 149 assert_eq!( 150 to_html_with_options("foo <a href=\"\\*\">", &danger)?, 151 "<p>foo <a href=\"\\*\"></p>", 152 "should not support character escapes (1)" 153 ); 154 155 assert_eq!( 156 to_html_with_options("<a href=\"\\\"\">", &danger)?, 157 "<p>&lt;a href=&quot;&quot;&quot;&gt;</p>", 158 "should not support character escapes (2)" 159 ); 160 161 // Extra: 162 assert_eq!( 163 to_html_with_options("foo <!1>", &danger)?, 164 "<p>foo &lt;!1&gt;</p>", 165 "should not support non-comment, non-cdata, and non-named declaration" 166 ); 167 168 assert_eq!( 169 to_html_with_options("foo <!-not enough!-->", &danger)?, 170 "<p>foo &lt;!-not enough!--&gt;</p>", 171 "should not support comments w/ not enough dashes" 172 ); 173 174 assert_eq!( 175 to_html_with_options("foo <!---ok-->", &danger)?, 176 "<p>foo <!---ok--></p>", 177 "should support comments that start w/ a dash, if it’s not followed by a greater than" 178 ); 179 180 assert_eq!( 181 to_html_with_options("foo <!--->", &danger)?, 182 "<p>foo <!---></p>", 183 "should support comments that start w/ `->`" 184 ); 185 186 assert_eq!( 187 to_html_with_options("foo <!-- -> -->", &danger)?, 188 "<p>foo <!-- -> --></p>", 189 "should support `->` in a comment" 190 ); 191 192 assert_eq!( 193 to_html_with_options("foo <!--", &danger)?, 194 "<p>foo &lt;!--</p>", 195 "should not support eof in a comment (1)" 196 ); 197 198 assert_eq!( 199 to_html_with_options("foo <!--a", &danger)?, 200 "<p>foo &lt;!--a</p>", 201 "should not support eof in a comment (2)" 202 ); 203 204 assert_eq!( 205 to_html_with_options("foo <!--a-", &danger)?, 206 "<p>foo &lt;!--a-</p>", 207 "should not support eof in a comment (3)" 208 ); 209 210 assert_eq!( 211 to_html_with_options("foo <!--a--", &danger)?, 212 "<p>foo &lt;!--a--</p>", 213 "should not support eof in a comment (4)" 214 ); 215 216 // Note: cmjs parses this differently. 217 // See: <https://github.com/commonmark/commonmark.js/issues/193> 218 assert_eq!( 219 to_html_with_options("foo <![cdata[]]>", &danger)?, 220 "<p>foo &lt;![cdata[]]&gt;</p>", 221 "should not support lowercase “cdata”" 222 ); 223 224 assert_eq!( 225 to_html_with_options("foo <![CDATA", &danger)?, 226 "<p>foo &lt;![CDATA</p>", 227 "should not support eof in a CDATA (1)" 228 ); 229 230 assert_eq!( 231 to_html_with_options("foo <![CDATA[", &danger)?, 232 "<p>foo &lt;![CDATA[</p>", 233 "should not support eof in a CDATA (2)" 234 ); 235 236 assert_eq!( 237 to_html_with_options("foo <![CDATA[]", &danger)?, 238 "<p>foo &lt;![CDATA[]</p>", 239 "should not support eof in a CDATA (3)" 240 ); 241 242 assert_eq!( 243 to_html_with_options("foo <![CDATA[]]", &danger)?, 244 "<p>foo &lt;![CDATA[]]</p>", 245 "should not support eof in a CDATA (4)" 246 ); 247 248 assert_eq!( 249 to_html_with_options("foo <![CDATA[asd", &danger)?, 250 "<p>foo &lt;![CDATA[asd</p>", 251 "should not support eof in a CDATA (5)" 252 ); 253 254 assert_eq!( 255 to_html_with_options("foo <![CDATA[]]]]>", &danger)?, 256 "<p>foo <![CDATA[]]]]></p>", 257 "should support end-like constructs in CDATA" 258 ); 259 260 assert_eq!( 261 to_html_with_options("foo <!doctype", &danger)?, 262 "<p>foo &lt;!doctype</p>", 263 "should not support eof in declarations" 264 ); 265 266 assert_eq!( 267 to_html_with_options("foo <?php", &danger)?, 268 "<p>foo &lt;?php</p>", 269 "should not support eof in instructions (1)" 270 ); 271 272 assert_eq!( 273 to_html_with_options("foo <?php?", &danger)?, 274 "<p>foo &lt;?php?</p>", 275 "should not support eof in instructions (2)" 276 ); 277 278 assert_eq!( 279 to_html_with_options("foo <???>", &danger)?, 280 "<p>foo <???></p>", 281 "should support question marks in instructions" 282 ); 283 284 assert_eq!( 285 to_html_with_options("foo </3>", &danger)?, 286 "<p>foo &lt;/3&gt;</p>", 287 "should not support closing tags that don’t start w/ alphas" 288 ); 289 290 assert_eq!( 291 to_html_with_options("foo </a->", &danger)?, 292 "<p>foo </a-></p>", 293 "should support dashes in closing tags" 294 ); 295 296 assert_eq!( 297 to_html_with_options("foo </a >", &danger)?, 298 "<p>foo </a ></p>", 299 "should support whitespace after closing tag names" 300 ); 301 302 assert_eq!( 303 to_html_with_options("foo </a!>", &danger)?, 304 "<p>foo &lt;/a!&gt;</p>", 305 "should not support other characters after closing tag names" 306 ); 307 308 assert_eq!( 309 to_html_with_options("foo <a->", &danger)?, 310 "<p>foo <a-></p>", 311 "should support dashes in opening tags" 312 ); 313 314 assert_eq!( 315 to_html_with_options("foo <a >", &danger)?, 316 "<p>foo <a ></p>", 317 "should support whitespace after opening tag names" 318 ); 319 320 assert_eq!( 321 to_html_with_options("foo <a!>", &danger)?, 322 "<p>foo &lt;a!&gt;</p>", 323 "should not support other characters after opening tag names" 324 ); 325 326 assert_eq!( 327 to_html_with_options("foo <a !>", &danger)?, 328 "<p>foo &lt;a !&gt;</p>", 329 "should not support other characters in opening tags (1)" 330 ); 331 332 assert_eq!( 333 to_html_with_options("foo <a b!>", &danger)?, 334 "<p>foo &lt;a b!&gt;</p>", 335 "should not support other characters in opening tags (2)" 336 ); 337 338 assert_eq!( 339 to_html_with_options("foo <a b/>", &danger)?, 340 "<p>foo <a b/></p>", 341 "should support a self-closing slash after an attribute name" 342 ); 343 344 assert_eq!( 345 to_html_with_options("foo <a b>", &danger)?, 346 "<p>foo <a b></p>", 347 "should support a greater than after an attribute name" 348 ); 349 350 assert_eq!( 351 to_html_with_options("foo <a b=<>", &danger)?, 352 "<p>foo &lt;a b=&lt;&gt;</p>", 353 "should not support less than to start an unquoted attribute value" 354 ); 355 356 assert_eq!( 357 to_html_with_options("foo <a b=>>", &danger)?, 358 "<p>foo &lt;a b=&gt;&gt;</p>", 359 "should not support greater than to start an unquoted attribute value" 360 ); 361 362 assert_eq!( 363 to_html_with_options("foo <a b==>", &danger)?, 364 "<p>foo &lt;a b==&gt;</p>", 365 "should not support equals to to start an unquoted attribute value" 366 ); 367 368 assert_eq!( 369 to_html_with_options("foo <a b=`>", &danger)?, 370 "<p>foo &lt;a b=`&gt;</p>", 371 "should not support grave accent to start an unquoted attribute value" 372 ); 373 374 assert_eq!( 375 to_html_with_options("foo <a b=\"asd", &danger)?, 376 "<p>foo &lt;a b=&quot;asd</p>", 377 "should not support eof in double quoted attribute value" 378 ); 379 380 assert_eq!( 381 to_html_with_options("foo <a b='asd", &danger)?, 382 "<p>foo &lt;a b='asd</p>", 383 "should not support eof in single quoted attribute value" 384 ); 385 386 assert_eq!( 387 to_html_with_options("foo <a b=asd", &danger)?, 388 "<p>foo &lt;a b=asd</p>", 389 "should not support eof in unquoted attribute value" 390 ); 391 392 assert_eq!( 393 to_html_with_options("foo <a b=\nasd>", &danger)?, 394 "<p>foo <a b=\nasd></p>", 395 "should support an eol before an attribute value" 396 ); 397 398 assert_eq!( 399to_html_with_options("<x> a", &danger)?, 400"<p><x> a</p>", 401"should support starting a line w/ a tag if followed by anything other than an eol (after optional space/tabs)" 402); 403 404 assert_eq!( 405 to_html_with_options("<span foo=", &danger)?, 406 "<p>&lt;span foo=</p>", 407 "should support an EOF before an attribute value" 408 ); 409 410 assert_eq!( 411 to_html_with_options("a <!b\nc>", &danger)?, 412 "<p>a <!b\nc></p>", 413 "should support an EOL in a declaration" 414 ); 415 assert_eq!( 416 to_html_with_options("a <![CDATA[\n]]>", &danger)?, 417 "<p>a <![CDATA[\n]]></p>", 418 "should support an EOL in cdata" 419 ); 420 421 // Note: cmjs parses this differently. 422 // See: <https://github.com/commonmark/commonmark.js/issues/196> 423 assert_eq!( 424 to_html_with_options("a <?\n?>", &danger)?, 425 "<p>a <?\n?></p>", 426 "should support an EOL in an instruction" 427 ); 428 429 assert_eq!( 430 to_html_with_options( 431 "a <x>", 432 &Options { 433 parse: ParseOptions { 434 constructs: Constructs { 435 html_text: false, 436 ..Default::default() 437 }, 438 ..Default::default() 439 }, 440 ..Default::default() 441 } 442 )?, 443 "<p>a &lt;x&gt;</p>", 444 "should support turning off html (text)" 445 ); 446 447 assert_eq!( 448 to_mdast("alpha <i>bravo</b> charlie.", &Default::default())?, 449 Node::Root(Root { 450 children: vec![Node::Paragraph(Paragraph { 451 children: vec![ 452 Node::Text(Text { 453 value: "alpha ".into(), 454 position: Some(Position::new(1, 1, 0, 1, 7, 6)) 455 }), 456 Node::Html(Html { 457 value: "<i>".into(), 458 position: Some(Position::new(1, 7, 6, 1, 10, 9)) 459 }), 460 Node::Text(Text { 461 value: "bravo".into(), 462 position: Some(Position::new(1, 10, 9, 1, 15, 14)) 463 }), 464 Node::Html(Html { 465 value: "</b>".into(), 466 position: Some(Position::new(1, 15, 14, 1, 19, 18)) 467 }), 468 Node::Text(Text { 469 value: " charlie.".into(), 470 position: Some(Position::new(1, 19, 18, 1, 28, 27)) 471 }) 472 ], 473 position: Some(Position::new(1, 1, 0, 1, 28, 27)) 474 })], 475 position: Some(Position::new(1, 1, 0, 1, 28, 27)) 476 }), 477 "should support HTML (text) as `Html`s in mdast" 478 ); 479 480 Ok(()) 481}