Markdown parser fork with extended syntax for personal use.
1//! MDX expression occurs in [MDX expression (flow)][mdx_expression_flow] and
2//! [MDX expression (text)][mdx_expression_text].
3//!
4//! ## Grammar
5//!
6//! MDX expression forms with the following BNF
7//! (<small>see [construct][crate::construct] for character groups</small>):
8//!
9//! ```bnf
10//! mdx_expression ::= '{' *(expression_text | expression) '}'
11//! expression_text ::= char - '{' - '}'
12//! ```
13//!
14//! ## Tokens
15//!
16//! * [`LineEnding`][Name::LineEnding]
17//! * [`MdxExpressionMarker`][Name::MdxExpressionMarker]
18//! * [`MdxExpressionData`][Name::MdxExpressionData]
19//!
20//! ## Recommendation
21//!
22//! When authoring markdown with JavaScript, keep in mind that MDX is a
23//! whitespace sensitive and line-based language, while JavaScript is
24//! insensitive to whitespace.
25//! This affects how markdown and JavaScript interleave with eachother in MDX.
26//! For more info on how it works, see [§ Interleaving][interleaving] on the
27//! MDX site.
28//!
29//! ## Errors
30//!
31//! ### Unexpected end of file in expression, expected a corresponding closing brace for `{`
32//!
33//! This error occurs if a `{` was seen without a `}`.
34//! For example:
35//!
36//! ```markdown
37//! a { b
38//! ```
39//!
40//! ### Unexpected lazy line in expression in container, expected line to be prefixed with `>` when in a block quote, whitespace when in a list, etc
41//!
42//! This error occurs if a a lazy line (of a container) is found in an expression.
43//! For example:
44//!
45//! ```markdown
46//! > {a +
47//! b}
48//! ```
49//!
50//! ## References
51//!
52//! * [`micromark-factory-mdx-expression`](https://github.com/micromark/micromark-extension-mdx-expression/blob/main/packages/micromark-factory-mdx-expression/dev/index.js)
53//! * [`mdxjs.com`](https://mdxjs.com)
54//!
55//! [mdx_expression_flow]: crate::construct::mdx_expression_flow
56//! [mdx_expression_text]: crate::construct::mdx_expression_text
57//! [interleaving]: https://mdxjs.com/docs/what-is-mdx/#interleaving
58
59use crate::event::Name;
60use crate::message;
61use crate::state::{Name as StateName, State};
62use crate::tokenizer::Tokenizer;
63use crate::util::mdx_collect::collect;
64use crate::{MdxExpressionKind, MdxExpressionParse, MdxSignal};
65use alloc::boxed::Box;
66
67/// Start of an MDX expression.
68///
69/// ```markdown
70/// > | a {Math.PI} c
71/// ^
72/// ```
73pub fn start(tokenizer: &mut Tokenizer) -> State {
74 debug_assert_eq!(tokenizer.current, Some(b'{'));
75 tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
76 tokenizer.enter(Name::MdxExpressionMarker);
77 tokenizer.consume();
78 tokenizer.exit(Name::MdxExpressionMarker);
79 tokenizer.tokenize_state.start = tokenizer.events.len() - 1;
80 State::Next(StateName::MdxExpressionBefore)
81}
82
83/// Before data.
84///
85/// ```markdown
86/// > | a {Math.PI} c
87/// ^
88/// ```
89pub fn before(tokenizer: &mut Tokenizer) -> State {
90 match tokenizer.current {
91 None => {
92 let problem = tokenizer.tokenize_state.mdx_last_parse_error.take()
93 .unwrap_or_else(|| ("Unexpected end of file in expression, expected a corresponding closing brace for `{`".into(), "markdown-rs".into(), "unexpected-eof".into()));
94
95 State::Error(message::Message {
96 place: Some(Box::new(message::Place::Point(tokenizer.point.to_unist()))),
97 reason: problem.0,
98 rule_id: Box::new(problem.2),
99 source: Box::new(problem.1),
100 })
101 }
102 Some(b'\n') => {
103 tokenizer.enter(Name::LineEnding);
104 tokenizer.consume();
105 tokenizer.exit(Name::LineEnding);
106 State::Next(StateName::MdxExpressionEolAfter)
107 }
108 Some(b'}') if tokenizer.tokenize_state.size == 0 => {
109 let state = if let Some(ref parse) = tokenizer.parse_state.options.mdx_expression_parse
110 {
111 parse_expression(tokenizer, parse)
112 } else {
113 State::Ok
114 };
115
116 if state == State::Ok {
117 tokenizer.tokenize_state.start = 0;
118 tokenizer.enter(Name::MdxExpressionMarker);
119 tokenizer.consume();
120 tokenizer.exit(Name::MdxExpressionMarker);
121 tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
122 }
123
124 state
125 }
126 Some(_) => {
127 tokenizer.enter(Name::MdxExpressionData);
128 State::Retry(StateName::MdxExpressionInside)
129 }
130 }
131}
132
133/// In data.
134///
135/// ```markdown
136/// > | a {Math.PI} c
137/// ^
138/// ```
139pub fn inside(tokenizer: &mut Tokenizer) -> State {
140 if matches!(tokenizer.current, None | Some(b'\n'))
141 || (tokenizer.current == Some(b'}') && tokenizer.tokenize_state.size == 0)
142 {
143 tokenizer.exit(Name::MdxExpressionData);
144 State::Retry(StateName::MdxExpressionBefore)
145 } else {
146 // Don’t count if gnostic.
147 if tokenizer.current == Some(b'{')
148 && tokenizer.parse_state.options.mdx_expression_parse.is_none()
149 {
150 tokenizer.tokenize_state.size += 1;
151 } else if tokenizer.current == Some(b'}') {
152 tokenizer.tokenize_state.size -= 1;
153 }
154
155 tokenizer.consume();
156 State::Next(StateName::MdxExpressionInside)
157 }
158}
159
160/// After eol.
161///
162/// ```markdown
163/// | a {b +
164/// > | c} d
165/// ^
166/// ```
167pub fn eol_after(tokenizer: &mut Tokenizer) -> State {
168 // Lazy continuation in a flow expression (or flow tag) is a syntax error.
169 if (tokenizer.tokenize_state.token_1 == Name::MdxFlowExpression
170 || tokenizer.tokenize_state.token_2 == Name::MdxJsxFlowTag)
171 && tokenizer.lazy
172 {
173 State::Error(
174 message::Message {
175 place: Some(Box::new(message::Place::Point(tokenizer.point.to_unist()))),
176 reason: "Unexpected lazy line in expression in container, expected line to be prefixed with `>` when in a block quote, whitespace when in a list, etc".into(),
177 source: Box::new("markdown-rs".into()),
178 rule_id: Box::new("unexpected-lazy".into()),
179 }
180 )
181 } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
182 // Idea: investigate if we’d need to use more complex stripping.
183 // Take this example:
184 //
185 // ```markdown
186 // > aaa <b c={`
187 // > d
188 // > `} /> eee
189 // ```
190 //
191 // Currently, the “paragraph” starts at `> | aaa`, so for the next line
192 // here we split it into `>␠|␠␠|␠␠␠d` (prefix, this indent here,
193 // expression data).
194 tokenizer.enter(Name::LinePrefix);
195 State::Retry(StateName::MdxExpressionPrefix)
196 } else {
197 State::Retry(StateName::MdxExpressionBefore)
198 }
199}
200
201pub fn prefix(tokenizer: &mut Tokenizer) -> State {
202 // Tab-size to eat has to be the same as what we serialize as.
203 // While in some places in markdown that’s 4, in JS it’s more common as 2.
204 // Which is what’s also in `mdast-util-mdx-jsx`:
205 // <https://github.com/syntax-tree/mdast-util-mdx-jsx/blob/40b951b/lib/index.js#L52>
206 // <https://github.com/micromark/micromark-extension-mdx-expression/blob/7c305ff/packages/micromark-factory-mdx-expression/dev/index.js#L37>
207 if matches!(tokenizer.current, Some(b'\t' | b' ')) && tokenizer.tokenize_state.size_c < 2 {
208 tokenizer.tokenize_state.size_c += 1;
209 tokenizer.consume();
210 return State::Next(StateName::MdxExpressionPrefix);
211 }
212
213 tokenizer.exit(Name::LinePrefix);
214 tokenizer.tokenize_state.size_c = 0;
215 State::Retry(StateName::MdxExpressionBefore)
216}
217
218/// Parse an expression with a given function.
219fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> State {
220 // Collect the body of the expression and positional info for each run of it.
221 let result = collect(
222 &tokenizer.events,
223 tokenizer.parse_state.bytes,
224 tokenizer.tokenize_state.start,
225 &[Name::MdxExpressionData, Name::LineEnding],
226 &[],
227 );
228
229 // Turn the name of the expression into a kind.
230 let kind = match tokenizer.tokenize_state.token_1 {
231 Name::MdxFlowExpression | Name::MdxTextExpression => MdxExpressionKind::Expression,
232 Name::MdxJsxTagAttributeExpression => MdxExpressionKind::AttributeExpression,
233 Name::MdxJsxTagAttributeValueExpression => MdxExpressionKind::AttributeValueExpression,
234 _ => unreachable!("cannot handle unknown expression name"),
235 };
236
237 // Parse and handle what was signaled back.
238 match parse(&result.value, &kind) {
239 MdxSignal::Ok => State::Ok,
240 MdxSignal::Error(reason, relative, source, rule_id) => {
241 let point = tokenizer
242 .parse_state
243 .location
244 .as_ref()
245 .expect("expected location index if aware mdx is on")
246 .relative_to_point(&result.stops, relative)
247 .unwrap_or_else(|| tokenizer.point.to_unist());
248
249 State::Error(message::Message {
250 place: Some(Box::new(message::Place::Point(point))),
251 reason,
252 rule_id,
253 source,
254 })
255 }
256 MdxSignal::Eof(reason, source, rule_id) => {
257 tokenizer.tokenize_state.mdx_last_parse_error = Some((reason, *source, *rule_id));
258 tokenizer.enter(Name::MdxExpressionData);
259 tokenizer.consume();
260 State::Next(StateName::MdxExpressionInside)
261 }
262 }
263}