Markdown parser fork with extended syntax for personal use.
at main 233 lines 7.1 kB view raw
1use markdown::{ 2 mdast::{Node, Paragraph, Root, Text}, 3 message, to_html, to_html_with_options, to_mdast, 4 unist::Position, 5 CompileOptions, Constructs, Options, ParseOptions, 6}; 7use pretty_assertions::assert_eq; 8 9#[test] 10fn character_reference() -> Result<(), message::Message> { 11 assert_eq!( 12 to_html( 13 "&nbsp; &amp; &copy; &AElig; &Dcaron;\n&frac34; &HilbertSpace; &DifferentialD;\n&ClockwiseContourIntegral; &ngE;" 14 ), 15 "<p>\u{a0} &amp; © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸</p>", 16 "should support named character references" 17 ); 18 19 assert_eq!( 20 to_html("&#35; &#1234; &#992; &#0;"), 21 "<p># Ӓ Ϡ �</p>", 22 "should support decimal character references" 23 ); 24 25 assert_eq!( 26 to_html("&#X22; &#XD06; &#xcab;"), 27 "<p>&quot; ആ ಫ</p>", 28 "should support hexadecimal character references" 29 ); 30 31 assert_eq!( 32 to_html( 33 "&nbsp &x; &#; &#x;\n&#987654321;\n&#abcdef0;\n&ThisIsNotDefined; &hi?;"), 34 "<p>&amp;nbsp &amp;x; &amp;#; &amp;#x;\n&amp;#987654321;\n&amp;#abcdef0;\n&amp;ThisIsNotDefined; &amp;hi?;</p>", 35 "should not support other things that look like character references" 36 ); 37 38 assert_eq!( 39 to_html("&copy"), 40 "<p>&amp;copy</p>", 41 "should not support character references w/o semicolon" 42 ); 43 44 assert_eq!( 45 to_html("&MadeUpEntity;"), 46 "<p>&amp;MadeUpEntity;</p>", 47 "should not support unknown named character references" 48 ); 49 50 assert_eq!( 51 to_html_with_options( 52 "<a href=\"&ouml;&ouml;.html\">", 53 &Options { 54 compile: CompileOptions { 55 allow_dangerous_html: true, 56 allow_dangerous_protocol: true, 57 ..Default::default() 58 }, 59 ..Default::default() 60 } 61 )?, 62 "<a href=\"&ouml;&ouml;.html\">", 63 "should not care about character references in html" 64 ); 65 66 assert_eq!( 67 to_html("[foo](/f&ouml;&ouml; \"f&ouml;&ouml;\")"), 68 "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>", 69 "should support character references in resource URLs and titles" 70 ); 71 72 assert_eq!( 73 to_html("[foo]: /f&ouml;&ouml; \"f&ouml;&ouml;\"\n\n[foo]"), 74 "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>", 75 "should support character references in definition URLs and titles" 76 ); 77 78 assert_eq!( 79 to_html("``` f&ouml;&ouml;\nfoo\n```"), 80 "<pre><code class=\"language-föö\">foo\n</code></pre>", 81 "should support character references in code language" 82 ); 83 84 assert_eq!( 85 to_html("`f&ouml;&ouml;`"), 86 "<p><code>f&amp;ouml;&amp;ouml;</code></p>", 87 "should not support character references in text code" 88 ); 89 90 assert_eq!( 91 to_html(" f&ouml;f&ouml;"), 92 "<pre><code>f&amp;ouml;f&amp;ouml;\n</code></pre>", 93 "should not support character references in indented code" 94 ); 95 96 assert_eq!( 97 to_html("&#42;foo&#42;\n*foo*"), 98 "<p>*foo*\n<em>foo</em></p>", 99 "should not support character references as construct markers (1)" 100 ); 101 102 assert_eq!( 103 to_html("&#42; foo\n\n* foo"), 104 "<p>* foo</p>\n<ul>\n<li>foo</li>\n</ul>", 105 "should not support character references as construct markers (2)" 106 ); 107 108 assert_eq!( 109 to_html("[a](url &quot;tit&quot;)"), 110 "<p>[a](url &quot;tit&quot;)</p>", 111 "should not support character references as construct markers (3)" 112 ); 113 114 assert_eq!( 115 to_html("foo&#10;&#10;bar"), 116 "<p>foo\n\nbar</p>", 117 "should not support character references as whitespace (1)" 118 ); 119 120 assert_eq!( 121 to_html("&#9;foo"), 122 "<p>\tfoo</p>", 123 "should not support character references as whitespace (2)" 124 ); 125 126 // Extra: 127 assert_eq!( 128 to_html("&CounterClockwiseContourIntegral;"), 129 "<p>∳</p>", 130 "should support the longest possible named character reference" 131 ); 132 133 assert_eq!( 134 to_html("&#xff9999;"), 135 "<p>�</p>", 136 "should “support” a longest possible hexadecimal character reference" 137 ); 138 139 assert_eq!( 140 to_html("&#9999999;"), 141 "<p>�</p>", 142 "should “support” a longest possible decimal character reference" 143 ); 144 145 assert_eq!( 146 to_html("&CounterClockwiseContourIntegrali;"), 147 "<p>&amp;CounterClockwiseContourIntegrali;</p>", 148 "should not support the longest possible named character reference" 149 ); 150 151 assert_eq!( 152 to_html("&#xff99999;"), 153 "<p>&amp;#xff99999;</p>", 154 "should not support a longest possible hexadecimal character reference" 155 ); 156 157 assert_eq!( 158 to_html("&#99999999;"), 159 "<p>&amp;#99999999;</p>", 160 "should not support a longest possible decimal character reference" 161 ); 162 163 assert_eq!( 164 to_html("&-;"), 165 "<p>&amp;-;</p>", 166 "should not support the other characters after `&`" 167 ); 168 169 assert_eq!( 170 to_html("&#-;"), 171 "<p>&amp;#-;</p>", 172 "should not support the other characters after `#`" 173 ); 174 175 assert_eq!( 176 to_html("&#x-;"), 177 "<p>&amp;#x-;</p>", 178 "should not support the other characters after `#x`" 179 ); 180 181 assert_eq!( 182 to_html("&lt-;"), 183 "<p>&amp;lt-;</p>", 184 "should not support the other characters inside a name" 185 ); 186 187 assert_eq!( 188 to_html("&#9-;"), 189 "<p>&amp;#9-;</p>", 190 "should not support the other characters inside a demical" 191 ); 192 193 assert_eq!( 194 to_html("&#x9-;"), 195 "<p>&amp;#x9-;</p>", 196 "should not support the other characters inside a hexademical" 197 ); 198 199 assert_eq!( 200 to_html_with_options( 201 "&amp;", 202 &Options { 203 parse: ParseOptions { 204 constructs: Constructs { 205 character_reference: false, 206 ..Default::default() 207 }, 208 ..Default::default() 209 }, 210 ..Default::default() 211 } 212 )?, 213 "<p>&amp;amp;</p>", 214 "should support turning off character references" 215 ); 216 217 assert_eq!( 218 to_mdast("&nbsp; &amp; &copy; &AElig; &Dcaron;\n&frac34; &HilbertSpace; &DifferentialD;\n&ClockwiseContourIntegral; &ngE;\n&#35; &#1234; &#992; &#0;\n&#X22; &#XD06; &#xcab;", &Default::default())?, 219 Node::Root(Root { 220 children: vec![Node::Paragraph(Paragraph { 221 children: vec![Node::Text(Text { 222 value: "\u{a0} & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸\n# Ӓ Ϡ �\n\" ആ ಫ".into(), 223 position: Some(Position::new(1, 1, 0, 5, 23, 158)) 224 }),], 225 position: Some(Position::new(1, 1, 0, 5, 23, 158)) 226 })], 227 position: Some(Position::new(1, 1, 0, 5, 23, 158)) 228 }), 229 "should support character references as `Text`s in mdast" 230 ); 231 232 Ok(()) 233}