Markdown parser fork with extended syntax for personal use.
at hack 281 lines 7.5 kB view raw
1//! The flow content type. 2//! 3//! **Flow** represents the sections, such as headings and code, which are 4//! parsed per line. 5//! An example is HTML, which has a certain starting condition (such as 6//! `<script>` on its own line), then continues for a while, until an end 7//! condition is found (such as `</style>`). 8//! If that line with an end condition is never found, that flow goes until 9//! the end. 10//! 11//! The constructs found in flow are: 12//! 13//! * [Blank line][crate::construct::blank_line] 14//! * [Code (indented)][crate::construct::code_indented] 15//! * [Heading (atx)][crate::construct::heading_atx] 16//! * [Heading (setext)][crate::construct::heading_setext] 17//! * [HTML (flow)][crate::construct::html_flow] 18//! * [MDX esm][crate::construct::mdx_esm] 19//! * [MDX expression (flow)][crate::construct::mdx_expression_flow] 20//! * [MDX JSX (flow)][crate::construct::mdx_jsx_flow] 21//! * [Raw (flow)][crate::construct::raw_flow] (code (fenced), math (flow)) 22//! * [Thematic break][crate::construct::thematic_break] 23 24use crate::event::Name; 25use crate::state::{Name as StateName, State}; 26use crate::tokenizer::Tokenizer; 27 28/// Start of flow. 29// 30/// ```markdown 31/// > | ## alpha 32/// ^ 33/// > | bravo 34/// ^ 35/// > | *** 36/// ^ 37/// ``` 38pub fn start(tokenizer: &mut Tokenizer) -> State { 39 match tokenizer.current { 40 Some(b'#') => { 41 tokenizer.attempt( 42 State::Next(StateName::FlowAfter), 43 State::Next(StateName::FlowBeforeContent), 44 ); 45 State::Retry(StateName::HeadingAtxStart) 46 } 47 Some(b'$' | b'`' | b'~') => { 48 tokenizer.attempt( 49 State::Next(StateName::FlowAfter), 50 State::Next(StateName::FlowBeforeContent), 51 ); 52 State::Retry(StateName::RawFlowStart) 53 } 54 // Note: `-` is also used in setext heading underline so it’s not 55 // included here. 56 Some(b'*' | b'_') => { 57 tokenizer.attempt( 58 State::Next(StateName::FlowAfter), 59 State::Next(StateName::FlowBeforeContent), 60 ); 61 State::Retry(StateName::ThematicBreakStart) 62 } 63 Some(b'<') => { 64 tokenizer.attempt( 65 State::Next(StateName::FlowAfter), 66 State::Next(StateName::FlowBeforeMdxJsx), 67 ); 68 State::Retry(StateName::HtmlFlowStart) 69 } 70 Some(b'e' | b'i') => { 71 tokenizer.attempt( 72 State::Next(StateName::FlowAfter), 73 State::Next(StateName::FlowBeforeContent), 74 ); 75 State::Retry(StateName::MdxEsmStart) 76 } 77 Some(b'{') => { 78 tokenizer.attempt( 79 State::Next(StateName::FlowAfter), 80 State::Next(StateName::FlowBeforeContent), 81 ); 82 State::Retry(StateName::MdxExpressionFlowStart) 83 } 84 // Actual parsing: blank line? Indented code? Indented anything? 85 // Tables, setext heading underlines, definitions, and Contents are 86 // particularly weird. 87 _ => State::Retry(StateName::FlowBlankLineBefore), 88 } 89} 90 91/// At blank line. 92/// 93/// ```markdown 94/// > | ␠␠␊ 95/// ^ 96/// ``` 97pub fn blank_line_before(tokenizer: &mut Tokenizer) -> State { 98 tokenizer.attempt( 99 State::Next(StateName::FlowBlankLineAfter), 100 State::Next(StateName::FlowBeforeCodeIndented), 101 ); 102 State::Retry(StateName::BlankLineStart) 103} 104 105/// At code (indented). 106/// 107/// ```markdown 108/// > | ␠␠␠␠a 109/// ^ 110/// ``` 111pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State { 112 tokenizer.attempt( 113 State::Next(StateName::FlowAfter), 114 State::Next(StateName::FlowBeforeRaw), 115 ); 116 State::Retry(StateName::CodeIndentedStart) 117} 118 119/// At raw. 120/// 121/// ````markdown 122/// > | ``` 123/// ^ 124/// ```` 125pub fn before_raw(tokenizer: &mut Tokenizer) -> State { 126 tokenizer.attempt( 127 State::Next(StateName::FlowAfter), 128 State::Next(StateName::FlowBeforeHtml), 129 ); 130 State::Retry(StateName::RawFlowStart) 131} 132 133/// At html (flow). 134/// 135/// ```markdown 136/// > | <a> 137/// ^ 138/// ``` 139pub fn before_html(tokenizer: &mut Tokenizer) -> State { 140 tokenizer.attempt( 141 State::Next(StateName::FlowAfter), 142 State::Next(StateName::FlowBeforeMdxJsx), 143 ); 144 State::Retry(StateName::HtmlFlowStart) 145} 146 147/// At mdx jsx (flow). 148/// 149/// ```markdown 150/// > | <A /> 151/// ^ 152/// ``` 153pub fn before_mdx_jsx(tokenizer: &mut Tokenizer) -> State { 154 tokenizer.attempt( 155 State::Next(StateName::FlowAfter), 156 State::Next(StateName::FlowBeforeHeadingAtx), 157 ); 158 State::Retry(StateName::MdxJsxFlowStart) 159} 160 161/// At heading (atx). 162/// 163/// ```markdown 164/// > | # a 165/// ^ 166/// ``` 167pub fn before_heading_atx(tokenizer: &mut Tokenizer) -> State { 168 tokenizer.attempt( 169 State::Next(StateName::FlowAfter), 170 State::Next(StateName::FlowBeforeHeadingSetext), 171 ); 172 State::Retry(StateName::HeadingAtxStart) 173} 174 175/// At heading (setext). 176/// 177/// ```markdown 178/// | a 179/// > | = 180/// ^ 181/// ``` 182pub fn before_heading_setext(tokenizer: &mut Tokenizer) -> State { 183 tokenizer.attempt( 184 State::Next(StateName::FlowAfter), 185 State::Next(StateName::FlowBeforeThematicBreak), 186 ); 187 State::Retry(StateName::HeadingSetextStart) 188} 189 190/// At thematic break. 191/// 192/// ```markdown 193/// > | *** 194/// ^ 195/// ``` 196pub fn before_thematic_break(tokenizer: &mut Tokenizer) -> State { 197 tokenizer.attempt( 198 State::Next(StateName::FlowAfter), 199 State::Next(StateName::FlowBeforeMdxExpression), 200 ); 201 State::Retry(StateName::ThematicBreakStart) 202} 203 204/// At MDX expression (flow). 205/// 206/// ```markdown 207/// > | {Math.PI} 208/// ^ 209/// ``` 210pub fn before_mdx_expression(tokenizer: &mut Tokenizer) -> State { 211 tokenizer.attempt( 212 State::Next(StateName::FlowAfter), 213 State::Next(StateName::FlowBeforeGfmTable), 214 ); 215 State::Retry(StateName::MdxExpressionFlowStart) 216} 217 218/// At GFM table. 219/// 220/// ```markdown 221/// > | | a | 222/// ^ 223/// ``` 224pub fn before_gfm_table(tokenizer: &mut Tokenizer) -> State { 225 tokenizer.attempt( 226 State::Next(StateName::FlowAfter), 227 State::Next(StateName::FlowBeforeContent), 228 ); 229 State::Retry(StateName::GfmTableStart) 230} 231 232/// At content. 233/// 234/// ```markdown 235/// > | a 236/// ^ 237/// ``` 238pub fn before_content(tokenizer: &mut Tokenizer) -> State { 239 tokenizer.attempt(State::Next(StateName::FlowAfter), State::Nok); 240 State::Retry(StateName::ContentChunkStart) 241} 242 243/// After blank line. 244/// 245/// ```markdown 246/// > | ␠␠␊ 247/// ^ 248/// ``` 249pub fn blank_line_after(tokenizer: &mut Tokenizer) -> State { 250 match tokenizer.current { 251 None => State::Ok, 252 Some(b'\n') => { 253 tokenizer.enter(Name::BlankLineEnding); 254 tokenizer.consume(); 255 tokenizer.exit(Name::BlankLineEnding); 256 // Feel free to interrupt. 257 tokenizer.interrupt = false; 258 State::Next(StateName::FlowStart) 259 } 260 _ => unreachable!("expected eol/eof"), 261 } 262} 263 264/// After flow. 265/// 266/// ```markdown 267/// > | # a␊ 268/// ^ 269/// ``` 270pub fn after(tokenizer: &mut Tokenizer) -> State { 271 match tokenizer.current { 272 None => State::Ok, 273 Some(b'\n') => { 274 tokenizer.enter(Name::LineEnding); 275 tokenizer.consume(); 276 tokenizer.exit(Name::LineEnding); 277 State::Next(StateName::FlowStart) 278 } 279 _ => unreachable!("expected eol/eof"), 280 } 281}