Markdown parser fork with extended syntax for personal use.
at hack 290 lines 9.8 kB view raw
1//! Bridge between `markdown-rs` and SWC. 2 3use crate::test_utils::swc_utils::{create_span, RewritePrefixContext}; 4use markdown::{MdxExpressionKind, MdxSignal}; 5use std::rc::Rc; 6use swc_core::common::{ 7 comments::{Comment, SingleThreadedComments, SingleThreadedCommentsMap}, 8 source_map::SmallPos, 9 BytePos, FileName, SourceFile, Span, Spanned, 10}; 11use swc_core::ecma::ast::{EsVersion, Expr, Module, PropOrSpread}; 12use swc_core::ecma::parser::{ 13 error::Error as SwcError, parse_file_as_expr, parse_file_as_module, EsSyntax, Syntax, 14}; 15use swc_core::ecma::visit::VisitMutWith; 16 17/// Lex ESM in MDX with SWC. 18pub fn parse_esm(value: &str) -> MdxSignal { 19 let result = parse_esm_core(value); 20 21 match result { 22 Err((span, message)) => swc_error_to_signal(span, &message, value.len()), 23 Ok(_) => MdxSignal::Ok, 24 } 25} 26 27/// Core to parse ESM. 28fn parse_esm_core(value: &str) -> Result<Module, (Span, String)> { 29 let (file, syntax, version) = create_config(value.into()); 30 let mut errors = vec![]; 31 let result = parse_file_as_module(&file, syntax, version, None, &mut errors); 32 33 match result { 34 Err(error) => Err(( 35 fix_span(error.span(), 1), 36 format!( 37 "Could not parse esm with swc: {}", 38 swc_error_to_string(&error) 39 ), 40 )), 41 Ok(module) => { 42 if errors.is_empty() { 43 let mut index = 0; 44 while index < module.body.len() { 45 let node = &module.body[index]; 46 47 if !node.is_module_decl() { 48 return Err(( 49 fix_span(node.span(), 1), 50 "Unexpected statement in code: only import/exports are supported" 51 .into(), 52 )); 53 } 54 55 index += 1; 56 } 57 58 Ok(module) 59 } else { 60 Err(( 61 fix_span(errors[0].span(), 1), 62 format!( 63 "Could not parse esm with swc: {}", 64 swc_error_to_string(&errors[0]) 65 ), 66 )) 67 } 68 } 69 } 70} 71 72fn parse_expression_core( 73 value: &str, 74 kind: &MdxExpressionKind, 75) -> Result<Option<Box<Expr>>, (Span, String)> { 76 // Empty expressions are OK. 77 if matches!(kind, MdxExpressionKind::Expression) && whitespace_and_comments(0, value).is_ok() { 78 return Ok(None); 79 } 80 81 // For attribute expression, a spread is needed, for which we have to prefix 82 // and suffix the input. 83 // See `check_expression_ast` for how the AST is verified. 84 let (prefix, suffix) = if matches!(kind, MdxExpressionKind::AttributeExpression) { 85 ("({", "})") 86 } else { 87 ("", "") 88 }; 89 90 let (file, syntax, version) = create_config(format!("{}{}{}", prefix, value, suffix)); 91 let mut errors = vec![]; 92 let result = parse_file_as_expr(&file, syntax, version, None, &mut errors); 93 94 match result { 95 Err(error) => Err(( 96 fix_span(error.span(), prefix.len() + 1), 97 format!( 98 "Could not parse expression with swc: {}", 99 swc_error_to_string(&error) 100 ), 101 )), 102 Ok(mut expr) => { 103 if errors.is_empty() { 104 let expression_end = expr.span().hi.to_usize() - 1; 105 if let Err((span, reason)) = whitespace_and_comments(expression_end, value) { 106 return Err((span, reason)); 107 } 108 109 expr.visit_mut_with(&mut RewritePrefixContext { 110 prefix_len: prefix.len() as u32, 111 }); 112 113 if matches!(kind, MdxExpressionKind::AttributeExpression) { 114 let expr_span = expr.span(); 115 116 if let Expr::Paren(d) = *expr { 117 if let Expr::Object(mut obj) = *d.expr { 118 if obj.props.len() > 1 { 119 return Err((obj.span, "Unexpected extra content in spread (such as `{...x,y}`): only a single spread is supported (such as `{...x}`)".into())); 120 } 121 122 if let Some(PropOrSpread::Spread(d)) = obj.props.pop() { 123 return Ok(Some(d.expr)); 124 } 125 } 126 }; 127 128 return Err(( 129 expr_span, 130 "Unexpected prop in spread (such as `{x}`): only a spread is supported (such as `{...x}`)".into(), 131 )); 132 } 133 134 Ok(Some(expr)) 135 } else { 136 Err(( 137 fix_span(errors[0].span(), prefix.len() + 1), 138 format!( 139 "Could not parse expression with swc: {}", 140 swc_error_to_string(&errors[0]) 141 ), 142 )) 143 } 144 } 145 } 146} 147 148/// Lex expressions in MDX with SWC. 149pub fn parse_expression(value: &str, kind: &MdxExpressionKind) -> MdxSignal { 150 let result = parse_expression_core(value, kind); 151 152 match result { 153 Err((span, message)) => swc_error_to_signal(span, &message, value.len()), 154 Ok(_) => MdxSignal::Ok, 155 } 156} 157 158// To do: remove this attribute, use it somewhere. 159#[allow(dead_code)] 160/// Turn SWC comments into a flat vec. 161pub fn flat_comments(single_threaded_comments: SingleThreadedComments) -> Vec<Comment> { 162 let raw_comments = single_threaded_comments.take_all(); 163 let take = |list: SingleThreadedCommentsMap| { 164 Rc::try_unwrap(list) 165 .unwrap() 166 .into_inner() 167 .into_values() 168 .flatten() 169 .collect::<Vec<_>>() 170 }; 171 let mut list = take(raw_comments.0); 172 list.append(&mut take(raw_comments.1)); 173 list 174} 175 176/// Turn an SWC error into an `MdxSignal`. 177/// 178/// * If the error happens at `value_len`, yields `MdxSignal::Eof` 179/// * Else, yields `MdxSignal::Error`. 180fn swc_error_to_signal(span: Span, reason: &str, value_len: usize) -> MdxSignal { 181 let source = Box::new("mdx".into()); 182 let rule_id = Box::new("swc".into()); 183 let error_end = span.hi.to_usize(); 184 185 if error_end >= value_len { 186 MdxSignal::Eof(reason.into(), source, rule_id) 187 } else { 188 MdxSignal::Error(reason.into(), span.lo.to_usize(), source, rule_id) 189 } 190} 191 192/// Turn an SWC error into a string. 193fn swc_error_to_string(error: &SwcError) -> String { 194 error.kind().msg().into() 195} 196 197/// Move past JavaScript whitespace (well, actually ASCII whitespace) and 198/// comments. 199/// 200/// This is needed because for expressions, we use an API that parses up to 201/// a valid expression, but there may be more expressions after it, which we 202/// don’t alow. 203fn whitespace_and_comments(mut index: usize, value: &str) -> Result<(), (Span, String)> { 204 let bytes = value.as_bytes(); 205 let len = bytes.len(); 206 let mut in_multiline = false; 207 let mut in_line = false; 208 209 while index < len { 210 // In a multiline comment: `/* a */`. 211 if in_multiline { 212 if index + 1 < len && bytes[index] == b'*' && bytes[index + 1] == b'/' { 213 index += 1; 214 in_multiline = false; 215 } 216 } 217 // In a line comment: `// a`. 218 else if in_line { 219 if bytes[index] == b'\r' || bytes[index] == b'\n' { 220 in_line = false; 221 } 222 } 223 // Not in a comment, opening a multiline comment: `/* a */`. 224 else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'*' { 225 index += 1; 226 in_multiline = true; 227 } 228 // Not in a comment, opening a line comment: `// a`. 229 else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'/' { 230 index += 1; 231 in_line = true; 232 } 233 // Outside comment, whitespace. 234 else if bytes[index].is_ascii_whitespace() { 235 // Fine! 236 } 237 // Outside comment, not whitespace. 238 else { 239 return Err(( 240 create_span(index as u32, value.len() as u32), 241 "Could not parse expression with swc: Unexpected content after expression".into(), 242 )); 243 } 244 245 index += 1; 246 } 247 248 if in_multiline { 249 return Err(( 250 create_span(index as u32, value.len() as u32), "Could not parse expression with swc: Unexpected unclosed multiline comment, expected closing: `*/`".into())); 251 } 252 253 if in_line { 254 // EOF instead of EOL is specifically not allowed, because that would 255 // mean the closing brace is on the commented-out line 256 return Err((create_span(index as u32, value.len() as u32), "Could not parse expression with swc: Unexpected unclosed line comment, expected line ending: `\\n`".into())); 257 } 258 259 Ok(()) 260} 261 262/// Create configuration for SWC, shared between ESM and expressions. 263/// 264/// This enables modern JavaScript (ES2022) + JSX. 265fn create_config(source: String) -> (SourceFile, Syntax, EsVersion) { 266 ( 267 // File. 268 SourceFile::new( 269 FileName::Anon.into(), 270 false, 271 FileName::Anon.into(), 272 source.into(), 273 BytePos::from_usize(1), 274 ), 275 // Syntax. 276 Syntax::Es(EsSyntax { 277 jsx: true, 278 ..EsSyntax::default() 279 }), 280 // Version. 281 // To do: update once in a while (last checked: 2024-04-18). 282 EsVersion::Es2022, 283 ) 284} 285 286fn fix_span(mut span: Span, offset: usize) -> Span { 287 span.lo = BytePos::from_usize(span.lo.to_usize() - offset); 288 span.hi = BytePos::from_usize(span.hi.to_usize() - offset); 289 span 290}