Markdown parser fork with extended syntax for personal use.
at hack 238 lines 6.8 kB view raw
1//! MDX ESM occurs in the [flow][] content type. 2//! 3//! ## Grammar 4//! 5//! MDX expression (flow) forms with the following BNF 6//! (<small>see [construct][crate::construct] for character groups</small>): 7//! 8//! ```bnf 9//! mdx_esm ::= word *line *(eol *line) 10//! 11//! word ::= 'e' 'x' 'p' 'o' 'r' 't' | 'i' 'm' 'p' 'o' 'r' 't' 12//! ``` 13//! 14//! This construct must be followed by a blank line or eof (end of file). 15//! It can include blank lines if [`MdxEsmParse`][crate::MdxEsmParse] passed in 16//! [`ParseOptions`][parse_options] allows it. 17//! 18//! ## Tokens 19//! 20//! * [`LineEnding`][Name::LineEnding] 21//! * [`MdxEsm`][Name::MdxEsm] 22//! * [`MdxEsmData`][Name::MdxEsmData] 23//! 24//! ## References 25//! 26//! * [`syntax.js` in `micromark-extension-mdxjs-esm`](https://github.com/micromark/micromark-extension-mdxjs-esm/blob/main/dev/lib/syntax.js) 27//! * [`mdxjs.com`](https://mdxjs.com) 28//! 29//! [flow]: crate::construct::flow 30//! [parse_options]: crate::ParseOptions 31 32use crate::event::Name; 33use crate::message; 34use crate::state::{Name as StateName, State}; 35use crate::tokenizer::Tokenizer; 36use crate::util::{mdx_collect::collect, slice::Slice}; 37use crate::MdxSignal; 38use alloc::boxed::Box; 39 40/// Start of MDX ESM. 41/// 42/// ```markdown 43/// > | import a from 'b' 44/// ^ 45/// ``` 46pub fn start(tokenizer: &mut Tokenizer) -> State { 47 // If it’s turned on. 48 if tokenizer.parse_state.options.constructs.mdx_esm 49 // If there is a gnostic parser. 50 && tokenizer.parse_state.options.mdx_esm_parse.is_some() 51 // When not interrupting. 52 && !tokenizer.interrupt 53 // Only at the start of a line, not at whitespace or in a container. 54 && tokenizer.point.column == 1 55 && matches!(tokenizer.current, Some(b'e' | b'i')) 56 { 57 // Place where keyword starts. 58 tokenizer.tokenize_state.start = tokenizer.point.index; 59 tokenizer.enter(Name::MdxEsm); 60 tokenizer.enter(Name::MdxEsmData); 61 tokenizer.consume(); 62 State::Next(StateName::MdxEsmWord) 63 } else { 64 State::Nok 65 } 66} 67 68/// In keyword. 69/// 70/// ```markdown 71/// > | import a from 'b' 72/// ^^^^^^ 73/// ``` 74pub fn word(tokenizer: &mut Tokenizer) -> State { 75 if matches!(tokenizer.current, Some(b'a'..=b'z')) { 76 tokenizer.consume(); 77 State::Next(StateName::MdxEsmWord) 78 } else { 79 let slice = Slice::from_indices( 80 tokenizer.parse_state.bytes, 81 tokenizer.tokenize_state.start, 82 tokenizer.point.index, 83 ); 84 85 if matches!(slice.as_str(), "export" | "import") && tokenizer.current == Some(b' ') { 86 tokenizer.concrete = true; 87 tokenizer.tokenize_state.start = tokenizer.events.len() - 1; 88 tokenizer.consume(); 89 State::Next(StateName::MdxEsmInside) 90 } else { 91 tokenizer.tokenize_state.start = 0; 92 State::Nok 93 } 94 } 95} 96 97/// In data. 98/// 99/// ```markdown 100/// > | import a from 'b' 101/// ^ 102/// ``` 103pub fn inside(tokenizer: &mut Tokenizer) -> State { 104 match tokenizer.current { 105 None | Some(b'\n') => { 106 tokenizer.exit(Name::MdxEsmData); 107 State::Retry(StateName::MdxEsmLineStart) 108 } 109 _ => { 110 tokenizer.consume(); 111 State::Next(StateName::MdxEsmInside) 112 } 113 } 114} 115 116/// At start of line. 117/// 118/// ```markdown 119/// | import a from 'b' 120/// > | export {a} 121/// ^ 122/// ``` 123pub fn line_start(tokenizer: &mut Tokenizer) -> State { 124 match tokenizer.current { 125 None => State::Retry(StateName::MdxEsmAtEnd), 126 Some(b'\n') => { 127 tokenizer.check( 128 State::Next(StateName::MdxEsmAtEnd), 129 State::Next(StateName::MdxEsmContinuationStart), 130 ); 131 State::Retry(StateName::MdxEsmBlankLineBefore) 132 } 133 _ => { 134 tokenizer.enter(Name::MdxEsmData); 135 tokenizer.consume(); 136 State::Next(StateName::MdxEsmInside) 137 } 138 } 139} 140 141/// At start of line that continues. 142/// 143/// ```markdown 144/// | import a from 'b' 145/// > | export {a} 146/// ^ 147/// ``` 148pub fn continuation_start(tokenizer: &mut Tokenizer) -> State { 149 tokenizer.enter(Name::LineEnding); 150 tokenizer.consume(); 151 tokenizer.exit(Name::LineEnding); 152 State::Next(StateName::MdxEsmLineStart) 153} 154 155/// At start of a potentially blank line. 156/// 157/// ```markdown 158/// | import a from 'b' 159/// > | export {a} 160/// ^ 161/// ``` 162pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { 163 tokenizer.enter(Name::LineEnding); 164 tokenizer.consume(); 165 tokenizer.exit(Name::LineEnding); 166 State::Next(StateName::BlankLineStart) 167} 168 169/// At end of line (blank or eof). 170/// 171/// ```markdown 172/// > | import a from 'b' 173/// ^ 174/// ``` 175pub fn at_end(tokenizer: &mut Tokenizer) -> State { 176 let result = parse_esm(tokenizer); 177 178 // Done!. 179 if matches!(result, State::Ok) { 180 tokenizer.concrete = false; 181 tokenizer.exit(Name::MdxEsm); 182 } 183 184 result 185} 186 187/// Parse ESM with a given function. 188fn parse_esm(tokenizer: &mut Tokenizer) -> State { 189 // We can `unwrap` because we don’t parse if this is `None`. 190 let parse = tokenizer 191 .parse_state 192 .options 193 .mdx_esm_parse 194 .as_ref() 195 .unwrap(); 196 197 // Collect the body of the ESM and positional info for each run of it. 198 let result = collect( 199 &tokenizer.events, 200 tokenizer.parse_state.bytes, 201 tokenizer.tokenize_state.start, 202 &[Name::MdxEsmData, Name::LineEnding], 203 &[], 204 ); 205 206 // Parse and handle what was signaled back. 207 match parse(&result.value) { 208 MdxSignal::Ok => State::Ok, 209 MdxSignal::Error(message, relative, source, rule_id) => { 210 let point = tokenizer 211 .parse_state 212 .location 213 .as_ref() 214 .expect("expected location index if aware mdx is on") 215 .relative_to_point(&result.stops, relative) 216 .expect("expected non-empty string"); 217 State::Error(message::Message { 218 place: Some(Box::new(message::Place::Point(point))), 219 reason: message, 220 source, 221 rule_id, 222 }) 223 } 224 MdxSignal::Eof(message, source, rule_id) => { 225 if tokenizer.current.is_none() { 226 State::Error(message::Message { 227 place: Some(Box::new(message::Place::Point(tokenizer.point.to_unist()))), 228 reason: message, 229 source, 230 rule_id, 231 }) 232 } else { 233 tokenizer.tokenize_state.mdx_last_parse_error = Some((message, *source, *rule_id)); 234 State::Retry(StateName::MdxEsmContinuationStart) 235 } 236 } 237 } 238}