Markdown parser fork with extended syntax for personal use.
at hack 242 lines 7.8 kB view raw
1//! Block quotes occur in the [document][] content type. 2//! 3//! ## Grammar 4//! 5//! Block quotes form with the following BNF 6//! (<small>see [construct][crate::construct] for character groups</small>): 7//! 8//! ```bnf 9//! block_quote_start ::= '>' [ space_or_tab ] 10//! block_quote_cont ::= '>' [ space_or_tab ] 11//! ``` 12//! 13//! Further lines that are not prefixed with `block_quote_cont` cause the block 14//! quote to be exited, except when those lines are lazy continuation. 15//! Like so many things in markdown, block quotes too are complex. 16//! See [*§ Phase 1: block structure* in `CommonMark`][commonmark-block] for 17//! more on parsing details. 18//! 19//! As block quote is a container, it takes several bytes from the start of the 20//! line, while the rest of the line includes more containers or flow. 21//! 22//! ## HTML 23//! 24//! Block quote relates to the `<blockquote>` element in HTML. 25//! See [*§ 4.4.4 The `blockquote` element*][html-blockquote] in the HTML spec 26//! for more info. 27//! 28//! ## Recommendation 29//! 30//! Always use a single space after a block quote marker (`>`). 31//! Never use lazy continuation. 32//! 33//! ## Tokens 34//! 35//! * [`BlockQuote`][Name::BlockQuote] 36//! * [`BlockQuoteMarker`][Name::BlockQuoteMarker] 37//! * [`BlockQuotePrefix`][Name::BlockQuotePrefix] 38//! * [`SpaceOrTab`][Name::SpaceOrTab] 39//! 40//! ## References 41//! 42//! * [`block-quote.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/block-quote.js) 43//! * [*§ 5.1 Block quotes* in `CommonMark`](https://spec.commonmark.org/0.31/#block-quotes) 44//! 45//! [document]: crate::construct::document 46//! [html-blockquote]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-blockquote-element 47//! [commonmark-block]: https://spec.commonmark.org/0.31/#phase-1-block-structure 48 49use crate::construct::partial_space_or_tab::space_or_tab_min_max; 50use crate::event::Name; 51use crate::state::{Name as StateName, State}; 52use crate::tokenizer::Tokenizer; 53use crate::util::constant::TAB_SIZE; 54 55/// Start of block quote. 56/// 57/// ```markdown 58/// > | > a 59/// ^ 60/// ``` 61pub fn start(tokenizer: &mut Tokenizer) -> State { 62 if tokenizer.parse_state.options.constructs.block_quote { 63 tokenizer.enter(Name::BlockQuote); 64 65 State::Retry(StateName::BlockQuoteContStart) 66 } else { 67 State::Nok 68 } 69} 70 71/// Start of block quote continuation. 72/// 73/// Also used to parse the first block quote opening. 74/// 75/// ```markdown 76/// | > a 77/// > | > b 78/// ^ 79/// ``` 80pub fn cont_start(tokenizer: &mut Tokenizer) -> State { 81 if matches!(tokenizer.current, Some(b'\t' | b' ')) { 82 tokenizer.attempt(State::Next(StateName::BlockQuoteContBefore), State::Nok); 83 State::Retry(space_or_tab_min_max( 84 tokenizer, 85 1, 86 if tokenizer.parse_state.options.constructs.code_indented { 87 TAB_SIZE - 1 88 } else { 89 usize::MAX 90 }, 91 )) 92 } else { 93 State::Retry(StateName::BlockQuoteContBefore) 94 } 95} 96 97/// At `>`, after optional whitespace. 98/// 99/// Also used to parse the first block quote opening. 100/// 101/// ```markdown 102/// | > a 103/// > | > b 104/// ^ 105/// ``` 106pub fn cont_before(tokenizer: &mut Tokenizer) -> State { 107 match tokenizer.current { 108 Some(b'>') => { 109 tokenizer.enter(Name::BlockQuotePrefix); 110 tokenizer.enter(Name::BlockQuoteMarker); 111 tokenizer.consume(); 112 tokenizer.exit(Name::BlockQuoteMarker); 113 114 // If we are looking for obsidian block quote metadata... 115 if tokenizer.parse_state.options.constructs.obs_block_quote { 116 // Clean up our flags 117 tokenizer.tokenize_state.seen = false; 118 119 // Start looking for the metadata! 120 tokenizer.attempt( 121 State::Next(StateName::BlockQuoteContBeforeNoCallout), 122 State::Next(StateName::BlockQuoteContAfter), 123 ); 124 return State::Next(StateName::ObsidianBlockQuoteCalloutStart); 125 } 126 127 State::Next(StateName::BlockQuoteContAfter) 128 } 129 _ => State::Nok, 130 } 131} 132 133// Just the original version of the previous function, without looking for 134// Obsidian callout blocks... 135pub fn cont_before_no_callout(tokenizer: &mut Tokenizer) -> State { 136 match tokenizer.current { 137 Some(b'>') => { 138 tokenizer.enter(Name::BlockQuotePrefix); 139 tokenizer.enter(Name::BlockQuoteMarker); 140 tokenizer.consume(); 141 tokenizer.exit(Name::BlockQuoteMarker); 142 143 State::Next(StateName::BlockQuoteContAfter) 144 } 145 _ => State::Nok, 146 } 147} 148 149/// After `>`, before optional whitespace. 150/// 151/// ```markdown 152/// > | > a 153/// ^ 154/// > | >b 155/// ^ 156/// ``` 157pub fn cont_after(tokenizer: &mut Tokenizer) -> State { 158 if let Some(b'\t' | b' ') = tokenizer.current { 159 tokenizer.enter(Name::SpaceOrTab); 160 tokenizer.consume(); 161 tokenizer.exit(Name::SpaceOrTab); 162 } 163 164 tokenizer.exit(Name::BlockQuotePrefix); 165 State::Ok 166} 167 168pub fn obs_callout_start(tokenizer: &mut Tokenizer) -> State { 169 match tokenizer.current { 170 // Eat all whitespace before a callout 171 Some(b' ' | b'\t') => { 172 tokenizer.attempt( 173 State::Next(StateName::ObsidianBlockQuoteCalloutStart), 174 // State::Next(StateName::BlockQuoteContAfter), 175 State::Nok, 176 ); 177 State::Retry(space_or_tab_min_max( 178 tokenizer, 179 1, 180 if tokenizer.parse_state.options.constructs.code_indented { 181 TAB_SIZE - 1 182 } else { 183 usize::MAX 184 }, 185 )) 186 } 187 Some(b'[') => { 188 // We reuse state to mark that we've seen a possible callout start 189 tokenizer.tokenize_state.seen = true; 190 tokenizer.consume(); 191 State::Next(StateName::ObsidianBlockQuoteCalloutStart) 192 } 193 Some(b'!') if tokenizer.tokenize_state.seen => { 194 // We're inside a callout now 195 tokenizer.enter(Name::ObsidianBlockQuoteCallout); 196 tokenizer.enter(Name::ObsidianBlockQuoteCalloutType); 197 tokenizer.consume(); 198 199 // We reuse this state variable until we've found a closing ] 200 tokenizer.tokenize_state.seen = true; 201 State::Next(StateName::ObsidianBlockQuoteCalloutInner) 202 } 203 _ => State::Nok, 204 } 205} 206 207/// ```markdown 208/// > [!blahblah] 209/// ^ 210/// ``` 211pub fn obs_callout_inner(tokenizer: &mut Tokenizer) -> State { 212 match tokenizer.current { 213 Some(b']') if tokenizer.tokenize_state.seen => { 214 // We've seen the end of the callout declaration 215 tokenizer.tokenize_state.seen = false; 216 217 // TODO: Optionally check for - character 218 tokenizer.consume(); 219 tokenizer.exit(Name::ObsidianBlockQuoteCalloutType); 220 tokenizer.enter(Name::ObsidianText); 221 State::Next(StateName::ObsidianBlockQuoteCalloutInner) 222 } 223 Some(b'\n') => { 224 // Resume normal flow! 225 tokenizer.consume(); 226 tokenizer.exit(Name::ObsidianText); 227 tokenizer.exit(Name::ObsidianBlockQuoteCallout); 228 tokenizer.tokenize_state.seen = false; 229 // State::Next(StateName::BlockQuoteContBefore) 230 State::Ok 231 } 232 _ if tokenizer.tokenize_state.seen => { 233 tokenizer.consume(); 234 State::Next(StateName::ObsidianBlockQuoteCalloutInner) 235 } 236 _ => { 237 // We just save whatever this is as text 238 tokenizer.consume(); 239 State::Next(StateName::ObsidianBlockQuoteCalloutInner) 240 } 241 } 242}