Markdown parser fork with extended syntax for personal use.

Fix expression indent, sync tests w/ micromark

+242 -26
+7 -11
src/construct/partial_mdx_expression.rs
··· 64 64 use crate::{MdxExpressionKind, MdxExpressionParse, MdxSignal}; 65 65 use alloc::boxed::Box; 66 66 67 - // Tab-size to eat has to be the same as what we serialize as. 68 - // While in some places in markdown that’s 4, in JS it’s more common as 2. 69 - // Which is what’s also in `mdast-util-mdx-jsx`: 70 - // <https://github.com/syntax-tree/mdast-util-mdx-jsx/blob/40b951b/lib/index.js#L52> 71 - // <https://github.com/micromark/micromark-extension-mdx-expression/blob/7c305ff/packages/micromark-factory-mdx-expression/dev/index.js#L37> 72 - pub const INDENT_SIZE: usize = 2; 73 - 74 67 /// Start of an MDX expression. 75 68 /// 76 69 /// ```markdown ··· 206 199 } 207 200 208 201 pub fn prefix(tokenizer: &mut Tokenizer) -> State { 209 - tokenizer.tokenize_state.size_c += 1; 210 - if matches!(tokenizer.current, Some(b'\t' | b' ')) 211 - && tokenizer.tokenize_state.size_c < INDENT_SIZE - 1 212 - { 202 + // Tab-size to eat has to be the same as what we serialize as. 203 + // While in some places in markdown that’s 4, in JS it’s more common as 2. 204 + // Which is what’s also in `mdast-util-mdx-jsx`: 205 + // <https://github.com/syntax-tree/mdast-util-mdx-jsx/blob/40b951b/lib/index.js#L52> 206 + // <https://github.com/micromark/micromark-extension-mdx-expression/blob/7c305ff/packages/micromark-factory-mdx-expression/dev/index.js#L37> 207 + if matches!(tokenizer.current, Some(b'\t' | b' ')) && tokenizer.tokenize_state.size_c < 2 { 208 + tokenizer.tokenize_state.size_c += 1; 213 209 tokenizer.consume(); 214 210 return State::Next(StateName::MdxExpressionPrefix); 215 211 }
+180 -10
tests/mdx_expression_flow.rs
··· 1 1 mod test_utils; 2 2 use markdown::{ 3 - mdast::{MdxFlowExpression, Node, Root}, 3 + mdast::{ 4 + AttributeContent, AttributeValue, AttributeValueExpression, Blockquote, MdxFlowExpression, 5 + MdxJsxAttribute, MdxJsxTextElement, MdxTextExpression, Node, Paragraph, Root, Text, 6 + }, 4 7 message, to_html_with_options, to_mdast, 5 8 unist::Position, 6 9 Constructs, Options, ParseOptions, ··· 8 11 use pretty_assertions::assert_eq; 9 12 use test_utils::swc::{parse_esm, parse_expression}; 10 13 14 + /// Note: these tests are also in `micromark/micromark-extension-mdx-expression` 15 + /// at `tests/index.js`. 11 16 #[test] 12 17 fn mdx_expression_flow_agnostic() -> Result<(), message::Message> { 13 18 let mdx = Options { ··· 100 105 assert_eq!( 101 106 to_html_with_options("a\n\n* b", &mdx)?, 102 107 "<p>a</p>\n<ul>\n<li>b</li>\n</ul>", 103 - "should support lists after non-expressions (GH-11)" 108 + "should support lists after non-expressions (wooorm/markdown-rs#11)" 104 109 ); 105 110 106 111 assert_eq!( ··· 160 165 Ok(()) 161 166 } 162 167 168 + /// Note: these tests are also in `micromark/micromark-extension-mdx-expression` 169 + /// at `tests/index.js`. 163 170 #[test] 164 171 fn mdx_expression_flow_gnostic() -> Result<(), message::Message> { 165 172 let swc = Options { ··· 235 242 "should support expressions padded w/ parens and comments" 236 243 ); 237 244 245 + assert_eq!( 246 + to_mdast("{`\n\t`}", &swc.parse)?, 247 + Node::Root(Root { 248 + children: vec![Node::MdxFlowExpression(MdxFlowExpression { 249 + value: "`\n `".into(), 250 + position: Some(Position::new(1, 1, 0, 2, 7, 6)), 251 + stops: vec![(0, 1), (1, 2), (2, 3)] 252 + })], 253 + position: Some(Position::new(1, 1, 0, 2, 7, 6)) 254 + }), 255 + "should use correct positional info when tabs are used (1, indent)" 256 + ); 257 + 258 + assert_eq!( 259 + to_mdast("{`\nalpha\t`}", &swc.parse)?, 260 + Node::Root(Root { 261 + children: vec![Node::MdxFlowExpression(MdxFlowExpression { 262 + value: "`\nalpha\t`".into(), 263 + position: Some(Position::new(1, 1, 0, 2, 11, 11)), 264 + stops: vec![(0, 1), (1, 2), (2, 3)] 265 + })], 266 + position: Some(Position::new(1, 1, 0, 2, 11, 11)) 267 + }), 268 + "should use correct positional info when tabs are used (2, content)" 269 + ); 270 + 271 + assert_eq!( 272 + to_mdast("> aaa <b c={`\n> d\n> `} /> eee", &swc.parse)?, 273 + Node::Root(Root { 274 + children: vec![Node::Blockquote(Blockquote { 275 + children: vec![Node::Paragraph(Paragraph { 276 + children: vec![ 277 + Node::Text(Text { 278 + value: "aaa ".into(), 279 + position: Some(Position::new(1, 4, 3, 1, 8, 7)) 280 + }), 281 + Node::MdxJsxTextElement(MdxJsxTextElement { 282 + children: vec![], 283 + name: Some("b".into()), 284 + attributes: vec![AttributeContent::Property(MdxJsxAttribute { 285 + name: "c".into(), 286 + value: Some(AttributeValue::Expression(AttributeValueExpression { 287 + value: "`\n d\n`".into(), 288 + stops: vec![(0, 13), (1, 14), (2, 19), (6, 23), (7, 27)] 289 + })) 290 + })], 291 + position: Some(Position::new(1, 8, 7, 3, 9, 32)) 292 + }), 293 + Node::Text(Text { 294 + value: " eee".into(), 295 + position: Some(Position::new(3, 9, 32, 3, 13, 36)) 296 + }) 297 + ], 298 + position: Some(Position::new(1, 3, 2, 3, 13, 36)) 299 + })], 300 + position: Some(Position::new(1, 1, 0, 3, 13, 36)) 301 + })], 302 + position: Some(Position::new(1, 1, 0, 3, 13, 36)) 303 + }), 304 + "should support template strings in JSX (text) in block quotes" 305 + ); 306 + 307 + assert_eq!( 308 + to_mdast("> ab {`\n>\t`}", &swc.parse)?, 309 + Node::Root(Root { 310 + children: vec![Node::Blockquote(Blockquote { 311 + children: vec![Node::Paragraph(Paragraph { 312 + children: vec![ 313 + Node::Text(Text { 314 + value: "ab ".into(), 315 + position: Some(Position::new(1, 3, 2, 1, 6, 5)) 316 + }), 317 + Node::MdxTextExpression(MdxTextExpression { 318 + value: "`\n`".into(), 319 + stops: vec![(0, 6), (1, 7), (2, 10)], 320 + position: Some(Position::new(1, 6, 5, 2, 7, 12)) 321 + }) 322 + ], 323 + position: Some(Position::new(1, 3, 2, 2, 7, 12)) 324 + })], 325 + position: Some(Position::new(1, 1, 0, 2, 7, 12)) 326 + })], 327 + position: Some(Position::new(1, 1, 0, 2, 7, 12)) 328 + }), 329 + "should use correct positional when there are virtual spaces due to a block quote" 330 + ); 331 + 332 + assert_eq!( 333 + to_mdast( 334 + "> {`\n> alpha\n> bravo\n> charlie\n> delta\n> `}", 335 + &swc.parse 336 + )?, 337 + Node::Root(Root { 338 + children: vec![Node::Blockquote(Blockquote { 339 + children: vec![Node::MdxFlowExpression(MdxFlowExpression { 340 + value: "`\nalpha\nbravo\ncharlie\n delta\n`".into(), 341 + position: Some(Position::new(1, 3, 2, 6, 5, 49)), 342 + stops: vec![ 343 + (0, 3), 344 + (1, 4), 345 + (2, 7), 346 + (7, 12), 347 + (8, 16), 348 + (13, 21), 349 + (14, 26), 350 + (21, 33), 351 + (22, 38), 352 + (28, 44), 353 + (29, 47) 354 + ] 355 + })], 356 + position: Some(Position::new(1, 1, 0, 6, 5, 49)) 357 + })], 358 + position: Some(Position::new(1, 1, 0, 6, 5, 49)) 359 + }), 360 + "should keep the correct number of spaces in a blockquote (flow)" 361 + ); 362 + 363 + assert_eq!( 364 + to_mdast( 365 + "> {`\n> alpha\n> bravo\n> charlie\n> delta\n> `}", 366 + &swc.parse 367 + )?, 368 + Node::Root(Root { 369 + children: vec![Node::Blockquote(Blockquote { 370 + children: vec![Node::MdxFlowExpression(MdxFlowExpression { 371 + value: "`\nalpha\nbravo\ncharlie\n delta\n`".into(), 372 + position: Some(Position::new(1, 3, 2, 6, 5, 49)), 373 + stops: vec![ 374 + (0, 3), 375 + (1, 4), 376 + (2, 7), 377 + (7, 12), 378 + (8, 16), 379 + (13, 21), 380 + (14, 26), 381 + (21, 33), 382 + (22, 38), 383 + (28, 44), 384 + (29, 47) 385 + ] 386 + })], 387 + position: Some(Position::new(1, 1, 0, 6, 5, 49)) 388 + })], 389 + position: Some(Position::new(1, 1, 0, 6, 5, 49)) 390 + }), 391 + "should keep the correct number of spaces in a blockquote (flow)" 392 + ); 393 + 394 + // Note: the weird character test has to go in mdxjs-rs. 395 + 238 396 Ok(()) 239 397 } 240 398 399 + /// Note: these tests are also in `micromark/micromark-extension-mdx-expression` 400 + /// at `tests/index.js`. 401 + /// This project includes *all* extensions which means that it can use JSX. 402 + /// There we test something that does not exist in actual MDX but which is used 403 + /// by the separate JSX extension. 241 404 #[test] 242 405 fn mdx_expression_spread() -> Result<(), message::Message> { 243 406 let swc = Options { ··· 253 416 assert_eq!( 254 417 to_html_with_options("<a {...b} />", &swc)?, 255 418 "", 256 - "should support spreads for attribute expression" 419 + "should support a spread" 257 420 ); 258 421 259 422 assert_eq!( ··· 272 435 ); 273 436 274 437 assert_eq!( 438 + to_html_with_options("<a {b=c}={} d>", &swc).err().unwrap().to_string(), 439 + "1:5: Unexpected prop in spread (such as `{x}`): only a spread is supported (such as `{...x}`) (mdx:swc)", 440 + "should crash on an incorrect spread that looks like an assignment" 441 + ); 442 + 443 + assert_eq!( 275 444 to_html_with_options("<a {...b,c} d>", &swc).err().unwrap().to_string(), 276 445 "1:5: Unexpected extra content in spread (such as `{...x,y}`): only a single spread is supported (such as `{...x}`) (mdx:swc)", 277 446 "should crash if a spread and other things" 278 447 ); 279 448 280 449 assert_eq!( 281 - to_html_with_options("<a {} />", &swc).err().unwrap().to_string(), 282 - "1:9: Unexpected prop in spread (such as `{x}`): only a spread is supported (such as `{...x}`) (mdx:swc)", 283 - "should crash on an empty spread" 284 - ); 285 - 286 - assert_eq!( 287 - to_html_with_options("<a {a=b} />", &swc) 450 + to_html_with_options("<a {b=c} />", &swc) 288 451 .err() 289 452 .unwrap() 290 453 .to_string(), 291 454 "1:12: Could not parse expression with swc: assignment property is invalid syntax (mdx:swc)", 292 455 "should crash if not an identifier" 456 + ); 457 + 458 + // Note: `markdown-rs` has no `allowEmpty`. 459 + assert_eq!( 460 + to_html_with_options("<a {} />", &swc).err().unwrap().to_string(), 461 + "1:9: Unexpected prop in spread (such as `{x}`): only a spread is supported (such as `{...x}`) (mdx:swc)", 462 + "should crash on an empty spread" 293 463 ); 294 464 295 465 assert_eq!(
+55 -5
tests/mdx_expression_text.rs
··· 1 1 mod test_utils; 2 2 use markdown::{ 3 - mdast::{MdxTextExpression, Node, Paragraph, Root, Text}, 3 + mdast::{Blockquote, MdxTextExpression, Node, Paragraph, Root, Text}, 4 4 message, to_html_with_options, to_mdast, 5 5 unist::Position, 6 6 Constructs, Options, ParseOptions, ··· 8 8 use pretty_assertions::assert_eq; 9 9 use test_utils::swc::{parse_esm, parse_expression}; 10 10 11 + /// Note: these tests are also in `micromark/micromark-extension-mdx-expression` 12 + /// at `tests/index.js`. 11 13 #[test] 12 - fn mdx_expression_text_gnostic_core() -> Result<(), message::Message> { 14 + fn mdx_expression() -> Result<(), message::Message> { 13 15 let swc = Options { 14 16 parse: ParseOptions { 15 17 constructs: Constructs::mdx(), ··· 59 61 assert_eq!( 60 62 to_html_with_options("a {/*b*/c} d", &swc)?, 61 63 "<p>a d</p>", 62 - "should support a multiline comment (4)" 64 + "should support a multiline comment (5)" 63 65 ); 64 66 65 67 assert_eq!( ··· 87 89 ); 88 90 89 91 assert_eq!( 90 - to_html_with_options("a {// b\nd} d", &swc)?, 92 + to_html_with_options("a {// b\nc} d", &swc)?, 91 93 "<p>a d</p>", 92 94 "should support a line comment followed by a line ending and an expression" 93 95 ); ··· 101 103 assert_eq!( 102 104 to_html_with_options("a {/*b*/ // c\n} d", &swc)?, 103 105 "<p>a d</p>", 104 - "should support comments (1)" 106 + "should support comments" 105 107 ); 106 108 107 109 assert_eq!( ··· 158 160 Ok(()) 159 161 } 160 162 163 + /// Note: these tests are also in `micromark/micromark-extension-mdx-expression` 164 + /// at `tests/index.js`. 161 165 #[test] 162 166 fn mdx_expression_text_agnostic() -> Result<(), message::Message> { 163 167 let mdx = Options { ··· 241 245 Ok(()) 242 246 } 243 247 248 + /// Note: these tests are also in `micromark/micromark-extension-mdx-expression` 249 + /// at `tests/index.js`. 244 250 #[test] 245 251 fn mdx_expression_text_gnostic() -> Result<(), message::Message> { 246 252 let swc = Options { ··· 320 326 to_html_with_options("a { /* } */ } b", &swc)?, 321 327 "<p>a b</p>", 322 328 "should support an unbalanced closing brace (if JS permits)" 329 + ); 330 + 331 + assert_eq!( 332 + to_mdast( 333 + "> alpha {`\n> bravo\n> charlie\n> delta\n> echo\n> `} foxtrot.", 334 + &swc.parse 335 + )?, 336 + Node::Root(Root { 337 + children: vec![Node::Blockquote(Blockquote { 338 + children: vec![Node::Paragraph(Paragraph { 339 + children: vec![ 340 + Node::Text(Text { 341 + value: "alpha ".into(), 342 + position: Some(Position::new(1, 3, 2, 1, 9, 8)), 343 + }), 344 + Node::MdxTextExpression(MdxTextExpression { 345 + value: "`\nbravo\ncharlie\ndelta\n echo\n`".into(), 346 + position: Some(Position::new(1, 9, 8, 6, 5, 54)), 347 + stops: vec![ 348 + (0, 9), 349 + (1, 10), 350 + (2, 13), 351 + (7, 18), 352 + (8, 22), 353 + (15, 29), 354 + (16, 34), 355 + (21, 39), 356 + (22, 44), 357 + (27, 49), 358 + (28, 52) 359 + ] 360 + }), 361 + Node::Text(Text { 362 + value: " foxtrot.".into(), 363 + position: Some(Position::new(6, 5, 54, 6, 14, 63)), 364 + }), 365 + ], 366 + position: Some(Position::new(1, 3, 2, 6, 14, 63)) 367 + })], 368 + position: Some(Position::new(1, 1, 0, 6, 14, 63)) 369 + })], 370 + position: Some(Position::new(1, 1, 0, 6, 14, 63)) 371 + }), 372 + "should keep the correct number of spaces in a blockquote (text)" 323 373 ); 324 374 325 375 Ok(())