Markdown parser fork with extended syntax for personal use.
at hack 290 lines 8.2 kB view raw
1//! Frontmatter occurs at the start of the document. 2//! 3//! ## Grammar 4//! 5//! Frontmatter forms with the following BNF 6//! (<small>see [construct][crate::construct] for character groups</small>): 7//! 8//! ```bnf 9//! frontmatter ::= fence_open *( eol *byte ) eol fence_close 10//! fence_open ::= sequence *space_or_tab 11//! ; Restriction: markers in `sequence` must match markers in opening sequence. 12//! fence_close ::= sequence *space_or_tab 13//! sequence ::= 3'+' | 3'-' 14//! ``` 15//! 16//! Frontmatter can only occur once. 17//! It cannot occur in a container. 18//! It must have a closing fence. 19//! Like flow constructs, it must be followed by an eol (line ending) or 20//! eof (end of file). 21//! 22//! ## Extension 23//! 24//! > 👉 **Note**: frontmatter is not part of `CommonMark`, so frontmatter is 25//! > not enabled by default. 26//! > You need to enable it manually. 27//! > See [`Constructs`][constructs] for more info. 28//! 29//! As there is no spec for frontmatter in markdown, this extension follows how 30//! YAML frontmatter works on `github.com`. 31//! It also parses TOML frontmatter, just like YAML except that it uses a `+`. 32//! 33//! ## Recommendation 34//! 35//! When authoring markdown with frontmatter, it’s recommended to use YAML 36//! frontmatter if possible. 37//! While YAML has some warts, it works in the most places, so using it 38//! guarantees the highest chance of portability. 39//! 40//! In certain ecosystems, other flavors are widely used. 41//! For example, in the Rust ecosystem, TOML is often used. 42//! In such cases, using TOML is an okay choice. 43//! 44//! ## Tokens 45//! 46//! * [`Frontmatter`][Name::Frontmatter] 47//! * [`FrontmatterFence`][Name::FrontmatterFence] 48//! * [`FrontmatterSequence`][Name::FrontmatterSequence] 49//! * [`FrontmatterChunk`][Name::FrontmatterChunk] 50//! * [`LineEnding`][Name::LineEnding] 51//! * [`SpaceOrTab`][Name::SpaceOrTab] 52//! 53//! ## References 54//! 55//! * [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter) 56//! 57//! [constructs]: crate::Constructs 58 59use crate::construct::partial_space_or_tab::space_or_tab; 60use crate::event::Name; 61use crate::state::{Name as StateName, State}; 62use crate::tokenizer::Tokenizer; 63use crate::util::constant::FRONTMATTER_SEQUENCE_SIZE; 64 65/// Start of frontmatter. 66/// 67/// ```markdown 68/// > | --- 69/// ^ 70/// | title: "Venus" 71/// | --- 72/// ``` 73pub fn start(tokenizer: &mut Tokenizer) -> State { 74 // Indent not allowed. 75 if tokenizer.parse_state.options.constructs.frontmatter 76 && matches!(tokenizer.current, Some(b'+' | b'-')) 77 { 78 tokenizer.tokenize_state.marker = tokenizer.current.unwrap(); 79 tokenizer.enter(Name::Frontmatter); 80 tokenizer.enter(Name::FrontmatterFence); 81 tokenizer.enter(Name::FrontmatterSequence); 82 State::Retry(StateName::FrontmatterOpenSequence) 83 } else { 84 State::Nok 85 } 86} 87 88/// In open sequence. 89/// 90/// ```markdown 91/// > | --- 92/// ^ 93/// | title: "Venus" 94/// | --- 95/// ``` 96pub fn open_sequence(tokenizer: &mut Tokenizer) -> State { 97 if tokenizer.current == Some(tokenizer.tokenize_state.marker) { 98 tokenizer.tokenize_state.size += 1; 99 tokenizer.consume(); 100 State::Next(StateName::FrontmatterOpenSequence) 101 } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE { 102 tokenizer.tokenize_state.size = 0; 103 tokenizer.exit(Name::FrontmatterSequence); 104 105 if matches!(tokenizer.current, Some(b'\t' | b' ')) { 106 tokenizer.attempt(State::Next(StateName::FrontmatterOpenAfter), State::Nok); 107 State::Retry(space_or_tab(tokenizer)) 108 } else { 109 State::Retry(StateName::FrontmatterOpenAfter) 110 } 111 } else { 112 tokenizer.tokenize_state.marker = 0; 113 tokenizer.tokenize_state.size = 0; 114 State::Nok 115 } 116} 117 118/// After open sequence. 119/// 120/// ```markdown 121/// > | --- 122/// ^ 123/// | title: "Venus" 124/// | --- 125/// ``` 126pub fn open_after(tokenizer: &mut Tokenizer) -> State { 127 if let Some(b'\n') = tokenizer.current { 128 tokenizer.exit(Name::FrontmatterFence); 129 tokenizer.enter(Name::LineEnding); 130 tokenizer.consume(); 131 tokenizer.exit(Name::LineEnding); 132 tokenizer.attempt( 133 State::Next(StateName::FrontmatterAfter), 134 State::Next(StateName::FrontmatterContentStart), 135 ); 136 State::Next(StateName::FrontmatterCloseStart) 137 } else { 138 tokenizer.tokenize_state.marker = 0; 139 State::Nok 140 } 141} 142 143/// Start of close sequence. 144/// 145/// ```markdown 146/// | --- 147/// | title: "Venus" 148/// > | --- 149/// ^ 150/// ``` 151pub fn close_start(tokenizer: &mut Tokenizer) -> State { 152 if tokenizer.current == Some(tokenizer.tokenize_state.marker) { 153 tokenizer.enter(Name::FrontmatterFence); 154 tokenizer.enter(Name::FrontmatterSequence); 155 State::Retry(StateName::FrontmatterCloseSequence) 156 } else { 157 State::Nok 158 } 159} 160 161/// In close sequence. 162/// 163/// ```markdown 164/// | --- 165/// | title: "Venus" 166/// > | --- 167/// ^ 168/// ``` 169pub fn close_sequence(tokenizer: &mut Tokenizer) -> State { 170 if tokenizer.current == Some(tokenizer.tokenize_state.marker) { 171 tokenizer.tokenize_state.size += 1; 172 tokenizer.consume(); 173 State::Next(StateName::FrontmatterCloseSequence) 174 } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE { 175 tokenizer.tokenize_state.size = 0; 176 tokenizer.exit(Name::FrontmatterSequence); 177 178 if matches!(tokenizer.current, Some(b'\t' | b' ')) { 179 tokenizer.attempt(State::Next(StateName::FrontmatterCloseAfter), State::Nok); 180 State::Retry(space_or_tab(tokenizer)) 181 } else { 182 State::Retry(StateName::FrontmatterCloseAfter) 183 } 184 } else { 185 tokenizer.tokenize_state.size = 0; 186 State::Nok 187 } 188} 189 190/// After close sequence. 191/// 192/// ```markdown 193/// | --- 194/// | title: "Venus" 195/// > | --- 196/// ^ 197/// ``` 198pub fn close_after(tokenizer: &mut Tokenizer) -> State { 199 match tokenizer.current { 200 None | Some(b'\n') => { 201 tokenizer.exit(Name::FrontmatterFence); 202 State::Ok 203 } 204 _ => State::Nok, 205 } 206} 207 208/// Start of content chunk. 209/// 210/// ```markdown 211/// | --- 212/// > | title: "Venus" 213/// ^ 214/// | --- 215/// ``` 216pub fn content_start(tokenizer: &mut Tokenizer) -> State { 217 match tokenizer.current { 218 None | Some(b'\n') => State::Retry(StateName::FrontmatterContentEnd), 219 Some(_) => { 220 tokenizer.enter(Name::FrontmatterChunk); 221 State::Retry(StateName::FrontmatterContentInside) 222 } 223 } 224} 225 226/// In content chunk. 227/// 228/// ```markdown 229/// | --- 230/// > | title: "Venus" 231/// ^ 232/// | --- 233/// ``` 234pub fn content_inside(tokenizer: &mut Tokenizer) -> State { 235 match tokenizer.current { 236 None | Some(b'\n') => { 237 tokenizer.exit(Name::FrontmatterChunk); 238 State::Retry(StateName::FrontmatterContentEnd) 239 } 240 Some(_) => { 241 tokenizer.consume(); 242 State::Next(StateName::FrontmatterContentInside) 243 } 244 } 245} 246 247/// End of content chunk. 248/// 249/// ```markdown 250/// | --- 251/// > | title: "Venus" 252/// ^ 253/// | --- 254/// ``` 255pub fn content_end(tokenizer: &mut Tokenizer) -> State { 256 match tokenizer.current { 257 None => { 258 tokenizer.tokenize_state.marker = 0; 259 State::Nok 260 } 261 Some(b'\n') => { 262 tokenizer.enter(Name::LineEnding); 263 tokenizer.consume(); 264 tokenizer.exit(Name::LineEnding); 265 tokenizer.attempt( 266 State::Next(StateName::FrontmatterAfter), 267 State::Next(StateName::FrontmatterContentStart), 268 ); 269 State::Next(StateName::FrontmatterCloseStart) 270 } 271 Some(_) => unreachable!("expected eof/eol"), 272 } 273} 274 275/// After frontmatter. 276/// 277/// ```markdown 278/// | --- 279/// | title: "Venus" 280/// > | --- 281/// ^ 282/// ``` 283pub fn after(tokenizer: &mut Tokenizer) -> State { 284 debug_assert!( 285 matches!(tokenizer.current, None | Some(b'\n')), 286 "expected eol/eof after closing fence" 287 ); 288 tokenizer.exit(Name::Frontmatter); 289 State::Ok 290}