Markdown parser fork with extended syntax for personal use.
1//! Bridge between `markdown-rs` and SWC.
2
3use crate::test_utils::swc_utils::{create_span, RewritePrefixContext};
4use markdown::{MdxExpressionKind, MdxSignal};
5use std::rc::Rc;
6use swc_core::common::{
7 comments::{Comment, SingleThreadedComments, SingleThreadedCommentsMap},
8 source_map::SmallPos,
9 BytePos, FileName, SourceFile, Span, Spanned,
10};
11use swc_core::ecma::ast::{EsVersion, Expr, Module, PropOrSpread};
12use swc_core::ecma::parser::{
13 error::Error as SwcError, parse_file_as_expr, parse_file_as_module, EsSyntax, Syntax,
14};
15use swc_core::ecma::visit::VisitMutWith;
16
17/// Lex ESM in MDX with SWC.
18pub fn parse_esm(value: &str) -> MdxSignal {
19 let result = parse_esm_core(value);
20
21 match result {
22 Err((span, message)) => swc_error_to_signal(span, &message, value.len()),
23 Ok(_) => MdxSignal::Ok,
24 }
25}
26
27/// Core to parse ESM.
28fn parse_esm_core(value: &str) -> Result<Module, (Span, String)> {
29 let (file, syntax, version) = create_config(value.into());
30 let mut errors = vec![];
31 let result = parse_file_as_module(&file, syntax, version, None, &mut errors);
32
33 match result {
34 Err(error) => Err((
35 fix_span(error.span(), 1),
36 format!(
37 "Could not parse esm with swc: {}",
38 swc_error_to_string(&error)
39 ),
40 )),
41 Ok(module) => {
42 if errors.is_empty() {
43 let mut index = 0;
44 while index < module.body.len() {
45 let node = &module.body[index];
46
47 if !node.is_module_decl() {
48 return Err((
49 fix_span(node.span(), 1),
50 "Unexpected statement in code: only import/exports are supported"
51 .into(),
52 ));
53 }
54
55 index += 1;
56 }
57
58 Ok(module)
59 } else {
60 Err((
61 fix_span(errors[0].span(), 1),
62 format!(
63 "Could not parse esm with swc: {}",
64 swc_error_to_string(&errors[0])
65 ),
66 ))
67 }
68 }
69 }
70}
71
72fn parse_expression_core(
73 value: &str,
74 kind: &MdxExpressionKind,
75) -> Result<Option<Box<Expr>>, (Span, String)> {
76 // Empty expressions are OK.
77 if matches!(kind, MdxExpressionKind::Expression) && whitespace_and_comments(0, value).is_ok() {
78 return Ok(None);
79 }
80
81 // For attribute expression, a spread is needed, for which we have to prefix
82 // and suffix the input.
83 // See `check_expression_ast` for how the AST is verified.
84 let (prefix, suffix) = if matches!(kind, MdxExpressionKind::AttributeExpression) {
85 ("({", "})")
86 } else {
87 ("", "")
88 };
89
90 let (file, syntax, version) = create_config(format!("{}{}{}", prefix, value, suffix));
91 let mut errors = vec![];
92 let result = parse_file_as_expr(&file, syntax, version, None, &mut errors);
93
94 match result {
95 Err(error) => Err((
96 fix_span(error.span(), prefix.len() + 1),
97 format!(
98 "Could not parse expression with swc: {}",
99 swc_error_to_string(&error)
100 ),
101 )),
102 Ok(mut expr) => {
103 if errors.is_empty() {
104 let expression_end = expr.span().hi.to_usize() - 1;
105 if let Err((span, reason)) = whitespace_and_comments(expression_end, value) {
106 return Err((span, reason));
107 }
108
109 expr.visit_mut_with(&mut RewritePrefixContext {
110 prefix_len: prefix.len() as u32,
111 });
112
113 if matches!(kind, MdxExpressionKind::AttributeExpression) {
114 let expr_span = expr.span();
115
116 if let Expr::Paren(d) = *expr {
117 if let Expr::Object(mut obj) = *d.expr {
118 if obj.props.len() > 1 {
119 return Err((obj.span, "Unexpected extra content in spread (such as `{...x,y}`): only a single spread is supported (such as `{...x}`)".into()));
120 }
121
122 if let Some(PropOrSpread::Spread(d)) = obj.props.pop() {
123 return Ok(Some(d.expr));
124 }
125 }
126 };
127
128 return Err((
129 expr_span,
130 "Unexpected prop in spread (such as `{x}`): only a spread is supported (such as `{...x}`)".into(),
131 ));
132 }
133
134 Ok(Some(expr))
135 } else {
136 Err((
137 fix_span(errors[0].span(), prefix.len() + 1),
138 format!(
139 "Could not parse expression with swc: {}",
140 swc_error_to_string(&errors[0])
141 ),
142 ))
143 }
144 }
145 }
146}
147
148/// Lex expressions in MDX with SWC.
149pub fn parse_expression(value: &str, kind: &MdxExpressionKind) -> MdxSignal {
150 let result = parse_expression_core(value, kind);
151
152 match result {
153 Err((span, message)) => swc_error_to_signal(span, &message, value.len()),
154 Ok(_) => MdxSignal::Ok,
155 }
156}
157
158// To do: remove this attribute, use it somewhere.
159#[allow(dead_code)]
160/// Turn SWC comments into a flat vec.
161pub fn flat_comments(single_threaded_comments: SingleThreadedComments) -> Vec<Comment> {
162 let raw_comments = single_threaded_comments.take_all();
163 let take = |list: SingleThreadedCommentsMap| {
164 Rc::try_unwrap(list)
165 .unwrap()
166 .into_inner()
167 .into_values()
168 .flatten()
169 .collect::<Vec<_>>()
170 };
171 let mut list = take(raw_comments.0);
172 list.append(&mut take(raw_comments.1));
173 list
174}
175
176/// Turn an SWC error into an `MdxSignal`.
177///
178/// * If the error happens at `value_len`, yields `MdxSignal::Eof`
179/// * Else, yields `MdxSignal::Error`.
180fn swc_error_to_signal(span: Span, reason: &str, value_len: usize) -> MdxSignal {
181 let source = Box::new("mdx".into());
182 let rule_id = Box::new("swc".into());
183 let error_end = span.hi.to_usize();
184
185 if error_end >= value_len {
186 MdxSignal::Eof(reason.into(), source, rule_id)
187 } else {
188 MdxSignal::Error(reason.into(), span.lo.to_usize(), source, rule_id)
189 }
190}
191
192/// Turn an SWC error into a string.
193fn swc_error_to_string(error: &SwcError) -> String {
194 error.kind().msg().into()
195}
196
197/// Move past JavaScript whitespace (well, actually ASCII whitespace) and
198/// comments.
199///
200/// This is needed because for expressions, we use an API that parses up to
201/// a valid expression, but there may be more expressions after it, which we
202/// don’t alow.
203fn whitespace_and_comments(mut index: usize, value: &str) -> Result<(), (Span, String)> {
204 let bytes = value.as_bytes();
205 let len = bytes.len();
206 let mut in_multiline = false;
207 let mut in_line = false;
208
209 while index < len {
210 // In a multiline comment: `/* a */`.
211 if in_multiline {
212 if index + 1 < len && bytes[index] == b'*' && bytes[index + 1] == b'/' {
213 index += 1;
214 in_multiline = false;
215 }
216 }
217 // In a line comment: `// a`.
218 else if in_line {
219 if bytes[index] == b'\r' || bytes[index] == b'\n' {
220 in_line = false;
221 }
222 }
223 // Not in a comment, opening a multiline comment: `/* a */`.
224 else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'*' {
225 index += 1;
226 in_multiline = true;
227 }
228 // Not in a comment, opening a line comment: `// a`.
229 else if index + 1 < len && bytes[index] == b'/' && bytes[index + 1] == b'/' {
230 index += 1;
231 in_line = true;
232 }
233 // Outside comment, whitespace.
234 else if bytes[index].is_ascii_whitespace() {
235 // Fine!
236 }
237 // Outside comment, not whitespace.
238 else {
239 return Err((
240 create_span(index as u32, value.len() as u32),
241 "Could not parse expression with swc: Unexpected content after expression".into(),
242 ));
243 }
244
245 index += 1;
246 }
247
248 if in_multiline {
249 return Err((
250 create_span(index as u32, value.len() as u32), "Could not parse expression with swc: Unexpected unclosed multiline comment, expected closing: `*/`".into()));
251 }
252
253 if in_line {
254 // EOF instead of EOL is specifically not allowed, because that would
255 // mean the closing brace is on the commented-out line
256 return Err((create_span(index as u32, value.len() as u32), "Could not parse expression with swc: Unexpected unclosed line comment, expected line ending: `\\n`".into()));
257 }
258
259 Ok(())
260}
261
262/// Create configuration for SWC, shared between ESM and expressions.
263///
264/// This enables modern JavaScript (ES2022) + JSX.
265fn create_config(source: String) -> (SourceFile, Syntax, EsVersion) {
266 (
267 // File.
268 SourceFile::new(
269 FileName::Anon.into(),
270 false,
271 FileName::Anon.into(),
272 source.into(),
273 BytePos::from_usize(1),
274 ),
275 // Syntax.
276 Syntax::Es(EsSyntax {
277 jsx: true,
278 ..EsSyntax::default()
279 }),
280 // Version.
281 // To do: update once in a while (last checked: 2024-04-18).
282 EsVersion::Es2022,
283 )
284}
285
286fn fix_span(mut span: Span, offset: usize) -> Span {
287 span.lo = BytePos::from_usize(span.lo.to_usize() - offset);
288 span.hi = BytePos::from_usize(span.hi.to_usize() - offset);
289 span
290}