Markdown parser fork with extended syntax for personal use.
1use markdown::{
2 mdast::{Node, Paragraph, Root, Text},
3 message, to_html, to_html_with_options, to_mdast,
4 unist::Position,
5 CompileOptions, Constructs, Options, ParseOptions,
6};
7use pretty_assertions::assert_eq;
8
9#[test]
10fn character_reference() -> Result<(), message::Message> {
11 assert_eq!(
12 to_html(
13 " & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸"
14 ),
15 "<p>\u{a0} & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸</p>",
16 "should support named character references"
17 );
18
19 assert_eq!(
20 to_html("# Ӓ Ϡ �"),
21 "<p># Ӓ Ϡ �</p>",
22 "should support decimal character references"
23 );
24
25 assert_eq!(
26 to_html("" ആ ಫ"),
27 "<p>" ആ ಫ</p>",
28 "should support hexadecimal character references"
29 );
30
31 assert_eq!(
32 to_html(
33 "  &x; &#; &#x;\n�\n&#abcdef0;\n&ThisIsNotDefined; &hi?;"),
34 "<p>&nbsp &x; &#; &#x;\n&#987654321;\n&#abcdef0;\n&ThisIsNotDefined; &hi?;</p>",
35 "should not support other things that look like character references"
36 );
37
38 assert_eq!(
39 to_html("©"),
40 "<p>&copy</p>",
41 "should not support character references w/o semicolon"
42 );
43
44 assert_eq!(
45 to_html("&MadeUpEntity;"),
46 "<p>&MadeUpEntity;</p>",
47 "should not support unknown named character references"
48 );
49
50 assert_eq!(
51 to_html_with_options(
52 "<a href=\"öö.html\">",
53 &Options {
54 compile: CompileOptions {
55 allow_dangerous_html: true,
56 allow_dangerous_protocol: true,
57 ..Default::default()
58 },
59 ..Default::default()
60 }
61 )?,
62 "<a href=\"öö.html\">",
63 "should not care about character references in html"
64 );
65
66 assert_eq!(
67 to_html("[foo](/föö \"föö\")"),
68 "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>",
69 "should support character references in resource URLs and titles"
70 );
71
72 assert_eq!(
73 to_html("[foo]: /föö \"föö\"\n\n[foo]"),
74 "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>",
75 "should support character references in definition URLs and titles"
76 );
77
78 assert_eq!(
79 to_html("``` föö\nfoo\n```"),
80 "<pre><code class=\"language-föö\">foo\n</code></pre>",
81 "should support character references in code language"
82 );
83
84 assert_eq!(
85 to_html("`föö`"),
86 "<p><code>f&ouml;&ouml;</code></p>",
87 "should not support character references in text code"
88 );
89
90 assert_eq!(
91 to_html(" föfö"),
92 "<pre><code>f&ouml;f&ouml;\n</code></pre>",
93 "should not support character references in indented code"
94 );
95
96 assert_eq!(
97 to_html("*foo*\n*foo*"),
98 "<p>*foo*\n<em>foo</em></p>",
99 "should not support character references as construct markers (1)"
100 );
101
102 assert_eq!(
103 to_html("* foo\n\n* foo"),
104 "<p>* foo</p>\n<ul>\n<li>foo</li>\n</ul>",
105 "should not support character references as construct markers (2)"
106 );
107
108 assert_eq!(
109 to_html("[a](url "tit")"),
110 "<p>[a](url "tit")</p>",
111 "should not support character references as construct markers (3)"
112 );
113
114 assert_eq!(
115 to_html("foo bar"),
116 "<p>foo\n\nbar</p>",
117 "should not support character references as whitespace (1)"
118 );
119
120 assert_eq!(
121 to_html("	foo"),
122 "<p>\tfoo</p>",
123 "should not support character references as whitespace (2)"
124 );
125
126 // Extra:
127 assert_eq!(
128 to_html("∳"),
129 "<p>∳</p>",
130 "should support the longest possible named character reference"
131 );
132
133 assert_eq!(
134 to_html("�"),
135 "<p>�</p>",
136 "should “support” a longest possible hexadecimal character reference"
137 );
138
139 assert_eq!(
140 to_html("�"),
141 "<p>�</p>",
142 "should “support” a longest possible decimal character reference"
143 );
144
145 assert_eq!(
146 to_html("&CounterClockwiseContourIntegrali;"),
147 "<p>&CounterClockwiseContourIntegrali;</p>",
148 "should not support the longest possible named character reference"
149 );
150
151 assert_eq!(
152 to_html("�"),
153 "<p>&#xff99999;</p>",
154 "should not support a longest possible hexadecimal character reference"
155 );
156
157 assert_eq!(
158 to_html("�"),
159 "<p>&#99999999;</p>",
160 "should not support a longest possible decimal character reference"
161 );
162
163 assert_eq!(
164 to_html("&-;"),
165 "<p>&-;</p>",
166 "should not support the other characters after `&`"
167 );
168
169 assert_eq!(
170 to_html("&#-;"),
171 "<p>&#-;</p>",
172 "should not support the other characters after `#`"
173 );
174
175 assert_eq!(
176 to_html("&#x-;"),
177 "<p>&#x-;</p>",
178 "should not support the other characters after `#x`"
179 );
180
181 assert_eq!(
182 to_html("<-;"),
183 "<p>&lt-;</p>",
184 "should not support the other characters inside a name"
185 );
186
187 assert_eq!(
188 to_html("	-;"),
189 "<p>&#9-;</p>",
190 "should not support the other characters inside a demical"
191 );
192
193 assert_eq!(
194 to_html("	-;"),
195 "<p>&#x9-;</p>",
196 "should not support the other characters inside a hexademical"
197 );
198
199 assert_eq!(
200 to_html_with_options(
201 "&",
202 &Options {
203 parse: ParseOptions {
204 constructs: Constructs {
205 character_reference: false,
206 ..Default::default()
207 },
208 ..Default::default()
209 },
210 ..Default::default()
211 }
212 )?,
213 "<p>&amp;</p>",
214 "should support turning off character references"
215 );
216
217 assert_eq!(
218 to_mdast(" & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸\n# Ӓ Ϡ �\n" ആ ಫ", &Default::default())?,
219 Node::Root(Root {
220 children: vec![Node::Paragraph(Paragraph {
221 children: vec![Node::Text(Text {
222 value: "\u{a0} & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸\n# Ӓ Ϡ �\n\" ആ ಫ".into(),
223 position: Some(Position::new(1, 1, 0, 5, 23, 158))
224 }),],
225 position: Some(Position::new(1, 1, 0, 5, 23, 158))
226 })],
227 position: Some(Position::new(1, 1, 0, 5, 23, 158))
228 }),
229 "should support character references as `Text`s in mdast"
230 );
231
232 Ok(())
233}