Markdown parser fork with extended syntax for personal use.
at hack 263 lines 9.5 kB view raw
1//! MDX expression occurs in [MDX expression (flow)][mdx_expression_flow] and 2//! [MDX expression (text)][mdx_expression_text]. 3//! 4//! ## Grammar 5//! 6//! MDX expression forms with the following BNF 7//! (<small>see [construct][crate::construct] for character groups</small>): 8//! 9//! ```bnf 10//! mdx_expression ::= '{' *(expression_text | expression) '}' 11//! expression_text ::= char - '{' - '}' 12//! ``` 13//! 14//! ## Tokens 15//! 16//! * [`LineEnding`][Name::LineEnding] 17//! * [`MdxExpressionMarker`][Name::MdxExpressionMarker] 18//! * [`MdxExpressionData`][Name::MdxExpressionData] 19//! 20//! ## Recommendation 21//! 22//! When authoring markdown with JavaScript, keep in mind that MDX is a 23//! whitespace sensitive and line-based language, while JavaScript is 24//! insensitive to whitespace. 25//! This affects how markdown and JavaScript interleave with eachother in MDX. 26//! For more info on how it works, see [§ Interleaving][interleaving] on the 27//! MDX site. 28//! 29//! ## Errors 30//! 31//! ### Unexpected end of file in expression, expected a corresponding closing brace for `{` 32//! 33//! This error occurs if a `{` was seen without a `}`. 34//! For example: 35//! 36//! ```markdown 37//! a { b 38//! ``` 39//! 40//! ### Unexpected lazy line in expression in container, expected line to be prefixed with `>` when in a block quote, whitespace when in a list, etc 41//! 42//! This error occurs if a a lazy line (of a container) is found in an expression. 43//! For example: 44//! 45//! ```markdown 46//! > {a + 47//! b} 48//! ``` 49//! 50//! ## References 51//! 52//! * [`micromark-factory-mdx-expression`](https://github.com/micromark/micromark-extension-mdx-expression/blob/main/packages/micromark-factory-mdx-expression/dev/index.js) 53//! * [`mdxjs.com`](https://mdxjs.com) 54//! 55//! [mdx_expression_flow]: crate::construct::mdx_expression_flow 56//! [mdx_expression_text]: crate::construct::mdx_expression_text 57//! [interleaving]: https://mdxjs.com/docs/what-is-mdx/#interleaving 58 59use crate::event::Name; 60use crate::message; 61use crate::state::{Name as StateName, State}; 62use crate::tokenizer::Tokenizer; 63use crate::util::mdx_collect::collect; 64use crate::{MdxExpressionKind, MdxExpressionParse, MdxSignal}; 65use alloc::boxed::Box; 66 67/// Start of an MDX expression. 68/// 69/// ```markdown 70/// > | a {Math.PI} c 71/// ^ 72/// ``` 73pub fn start(tokenizer: &mut Tokenizer) -> State { 74 debug_assert_eq!(tokenizer.current, Some(b'{')); 75 tokenizer.enter(tokenizer.tokenize_state.token_1.clone()); 76 tokenizer.enter(Name::MdxExpressionMarker); 77 tokenizer.consume(); 78 tokenizer.exit(Name::MdxExpressionMarker); 79 tokenizer.tokenize_state.start = tokenizer.events.len() - 1; 80 State::Next(StateName::MdxExpressionBefore) 81} 82 83/// Before data. 84/// 85/// ```markdown 86/// > | a {Math.PI} c 87/// ^ 88/// ``` 89pub fn before(tokenizer: &mut Tokenizer) -> State { 90 match tokenizer.current { 91 None => { 92 let problem = tokenizer.tokenize_state.mdx_last_parse_error.take() 93 .unwrap_or_else(|| ("Unexpected end of file in expression, expected a corresponding closing brace for `{`".into(), "markdown-rs".into(), "unexpected-eof".into())); 94 95 State::Error(message::Message { 96 place: Some(Box::new(message::Place::Point(tokenizer.point.to_unist()))), 97 reason: problem.0, 98 rule_id: Box::new(problem.2), 99 source: Box::new(problem.1), 100 }) 101 } 102 Some(b'\n') => { 103 tokenizer.enter(Name::LineEnding); 104 tokenizer.consume(); 105 tokenizer.exit(Name::LineEnding); 106 State::Next(StateName::MdxExpressionEolAfter) 107 } 108 Some(b'}') if tokenizer.tokenize_state.size == 0 => { 109 let state = if let Some(ref parse) = tokenizer.parse_state.options.mdx_expression_parse 110 { 111 parse_expression(tokenizer, parse) 112 } else { 113 State::Ok 114 }; 115 116 if state == State::Ok { 117 tokenizer.tokenize_state.start = 0; 118 tokenizer.enter(Name::MdxExpressionMarker); 119 tokenizer.consume(); 120 tokenizer.exit(Name::MdxExpressionMarker); 121 tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); 122 } 123 124 state 125 } 126 Some(_) => { 127 tokenizer.enter(Name::MdxExpressionData); 128 State::Retry(StateName::MdxExpressionInside) 129 } 130 } 131} 132 133/// In data. 134/// 135/// ```markdown 136/// > | a {Math.PI} c 137/// ^ 138/// ``` 139pub fn inside(tokenizer: &mut Tokenizer) -> State { 140 if matches!(tokenizer.current, None | Some(b'\n')) 141 || (tokenizer.current == Some(b'}') && tokenizer.tokenize_state.size == 0) 142 { 143 tokenizer.exit(Name::MdxExpressionData); 144 State::Retry(StateName::MdxExpressionBefore) 145 } else { 146 // Don’t count if gnostic. 147 if tokenizer.current == Some(b'{') 148 && tokenizer.parse_state.options.mdx_expression_parse.is_none() 149 { 150 tokenizer.tokenize_state.size += 1; 151 } else if tokenizer.current == Some(b'}') { 152 tokenizer.tokenize_state.size -= 1; 153 } 154 155 tokenizer.consume(); 156 State::Next(StateName::MdxExpressionInside) 157 } 158} 159 160/// After eol. 161/// 162/// ```markdown 163/// | a {b + 164/// > | c} d 165/// ^ 166/// ``` 167pub fn eol_after(tokenizer: &mut Tokenizer) -> State { 168 // Lazy continuation in a flow expression (or flow tag) is a syntax error. 169 if (tokenizer.tokenize_state.token_1 == Name::MdxFlowExpression 170 || tokenizer.tokenize_state.token_2 == Name::MdxJsxFlowTag) 171 && tokenizer.lazy 172 { 173 State::Error( 174 message::Message { 175 place: Some(Box::new(message::Place::Point(tokenizer.point.to_unist()))), 176 reason: "Unexpected lazy line in expression in container, expected line to be prefixed with `>` when in a block quote, whitespace when in a list, etc".into(), 177 source: Box::new("markdown-rs".into()), 178 rule_id: Box::new("unexpected-lazy".into()), 179 } 180 ) 181 } else if matches!(tokenizer.current, Some(b'\t' | b' ')) { 182 // Idea: investigate if we’d need to use more complex stripping. 183 // Take this example: 184 // 185 // ```markdown 186 // > aaa <b c={` 187 // > d 188 // > `} /> eee 189 // ``` 190 // 191 // Currently, the “paragraph” starts at `> | aaa`, so for the next line 192 // here we split it into `>␠|␠␠|␠␠␠d` (prefix, this indent here, 193 // expression data). 194 tokenizer.enter(Name::LinePrefix); 195 State::Retry(StateName::MdxExpressionPrefix) 196 } else { 197 State::Retry(StateName::MdxExpressionBefore) 198 } 199} 200 201pub fn prefix(tokenizer: &mut Tokenizer) -> State { 202 // Tab-size to eat has to be the same as what we serialize as. 203 // While in some places in markdown that’s 4, in JS it’s more common as 2. 204 // Which is what’s also in `mdast-util-mdx-jsx`: 205 // <https://github.com/syntax-tree/mdast-util-mdx-jsx/blob/40b951b/lib/index.js#L52> 206 // <https://github.com/micromark/micromark-extension-mdx-expression/blob/7c305ff/packages/micromark-factory-mdx-expression/dev/index.js#L37> 207 if matches!(tokenizer.current, Some(b'\t' | b' ')) && tokenizer.tokenize_state.size_c < 2 { 208 tokenizer.tokenize_state.size_c += 1; 209 tokenizer.consume(); 210 return State::Next(StateName::MdxExpressionPrefix); 211 } 212 213 tokenizer.exit(Name::LinePrefix); 214 tokenizer.tokenize_state.size_c = 0; 215 State::Retry(StateName::MdxExpressionBefore) 216} 217 218/// Parse an expression with a given function. 219fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> State { 220 // Collect the body of the expression and positional info for each run of it. 221 let result = collect( 222 &tokenizer.events, 223 tokenizer.parse_state.bytes, 224 tokenizer.tokenize_state.start, 225 &[Name::MdxExpressionData, Name::LineEnding], 226 &[], 227 ); 228 229 // Turn the name of the expression into a kind. 230 let kind = match tokenizer.tokenize_state.token_1 { 231 Name::MdxFlowExpression | Name::MdxTextExpression => MdxExpressionKind::Expression, 232 Name::MdxJsxTagAttributeExpression => MdxExpressionKind::AttributeExpression, 233 Name::MdxJsxTagAttributeValueExpression => MdxExpressionKind::AttributeValueExpression, 234 _ => unreachable!("cannot handle unknown expression name"), 235 }; 236 237 // Parse and handle what was signaled back. 238 match parse(&result.value, &kind) { 239 MdxSignal::Ok => State::Ok, 240 MdxSignal::Error(reason, relative, source, rule_id) => { 241 let point = tokenizer 242 .parse_state 243 .location 244 .as_ref() 245 .expect("expected location index if aware mdx is on") 246 .relative_to_point(&result.stops, relative) 247 .unwrap_or_else(|| tokenizer.point.to_unist()); 248 249 State::Error(message::Message { 250 place: Some(Box::new(message::Place::Point(point))), 251 reason, 252 rule_id, 253 source, 254 }) 255 } 256 MdxSignal::Eof(reason, source, rule_id) => { 257 tokenizer.tokenize_state.mdx_last_parse_error = Some((reason, *source, *rule_id)); 258 tokenizer.enter(Name::MdxExpressionData); 259 tokenizer.consume(); 260 State::Next(StateName::MdxExpressionInside) 261 } 262 } 263}