Markdown parser fork with extended syntax for personal use.
1//! Frontmatter occurs at the start of the document.
2//!
3//! ## Grammar
4//!
5//! Frontmatter forms with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! frontmatter ::= fence_open *( eol *byte ) eol fence_close
10//! fence_open ::= sequence *space_or_tab
11//! ; Restriction: markers in `sequence` must match markers in opening sequence.
12//! fence_close ::= sequence *space_or_tab
13//! sequence ::= 3'+' | 3'-'
14//! ```
15//!
16//! Frontmatter can only occur once.
17//! It cannot occur in a container.
18//! It must have a closing fence.
19//! Like flow constructs, it must be followed by an eol (line ending) or
20//! eof (end of file).
21//!
22//! ## Extension
23//!
24//! > 👉 **Note**: frontmatter is not part of `CommonMark`, so frontmatter is
25//! > not enabled by default.
26//! > You need to enable it manually.
27//! > See [`Constructs`][constructs] for more info.
28//!
29//! As there is no spec for frontmatter in markdown, this extension follows how
30//! YAML frontmatter works on `github.com`.
31//! It also parses TOML frontmatter, just like YAML except that it uses a `+`.
32//!
33//! ## Recommendation
34//!
35//! When authoring markdown with frontmatter, it’s recommended to use YAML
36//! frontmatter if possible.
37//! While YAML has some warts, it works in the most places, so using it
38//! guarantees the highest chance of portability.
39//!
40//! In certain ecosystems, other flavors are widely used.
41//! For example, in the Rust ecosystem, TOML is often used.
42//! In such cases, using TOML is an okay choice.
43//!
44//! ## Tokens
45//!
46//! * [`Frontmatter`][Name::Frontmatter]
47//! * [`FrontmatterFence`][Name::FrontmatterFence]
48//! * [`FrontmatterSequence`][Name::FrontmatterSequence]
49//! * [`FrontmatterChunk`][Name::FrontmatterChunk]
50//! * [`LineEnding`][Name::LineEnding]
51//! * [`SpaceOrTab`][Name::SpaceOrTab]
52//!
53//! ## References
54//!
55//! * [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter)
56//!
57//! [constructs]: crate::Constructs
58
59use crate::construct::partial_space_or_tab::space_or_tab;
60use crate::event::Name;
61use crate::state::{Name as StateName, State};
62use crate::tokenizer::Tokenizer;
63use crate::util::constant::FRONTMATTER_SEQUENCE_SIZE;
64
65/// Start of frontmatter.
66///
67/// ```markdown
68/// > | ---
69/// ^
70/// | title: "Venus"
71/// | ---
72/// ```
73pub fn start(tokenizer: &mut Tokenizer) -> State {
74 // Indent not allowed.
75 if tokenizer.parse_state.options.constructs.frontmatter
76 && matches!(tokenizer.current, Some(b'+' | b'-'))
77 {
78 tokenizer.tokenize_state.marker = tokenizer.current.unwrap();
79 tokenizer.enter(Name::Frontmatter);
80 tokenizer.enter(Name::FrontmatterFence);
81 tokenizer.enter(Name::FrontmatterSequence);
82 State::Retry(StateName::FrontmatterOpenSequence)
83 } else {
84 State::Nok
85 }
86}
87
88/// In open sequence.
89///
90/// ```markdown
91/// > | ---
92/// ^
93/// | title: "Venus"
94/// | ---
95/// ```
96pub fn open_sequence(tokenizer: &mut Tokenizer) -> State {
97 if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
98 tokenizer.tokenize_state.size += 1;
99 tokenizer.consume();
100 State::Next(StateName::FrontmatterOpenSequence)
101 } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE {
102 tokenizer.tokenize_state.size = 0;
103 tokenizer.exit(Name::FrontmatterSequence);
104
105 if matches!(tokenizer.current, Some(b'\t' | b' ')) {
106 tokenizer.attempt(State::Next(StateName::FrontmatterOpenAfter), State::Nok);
107 State::Retry(space_or_tab(tokenizer))
108 } else {
109 State::Retry(StateName::FrontmatterOpenAfter)
110 }
111 } else {
112 tokenizer.tokenize_state.marker = 0;
113 tokenizer.tokenize_state.size = 0;
114 State::Nok
115 }
116}
117
118/// After open sequence.
119///
120/// ```markdown
121/// > | ---
122/// ^
123/// | title: "Venus"
124/// | ---
125/// ```
126pub fn open_after(tokenizer: &mut Tokenizer) -> State {
127 if let Some(b'\n') = tokenizer.current {
128 tokenizer.exit(Name::FrontmatterFence);
129 tokenizer.enter(Name::LineEnding);
130 tokenizer.consume();
131 tokenizer.exit(Name::LineEnding);
132 tokenizer.attempt(
133 State::Next(StateName::FrontmatterAfter),
134 State::Next(StateName::FrontmatterContentStart),
135 );
136 State::Next(StateName::FrontmatterCloseStart)
137 } else {
138 tokenizer.tokenize_state.marker = 0;
139 State::Nok
140 }
141}
142
143/// Start of close sequence.
144///
145/// ```markdown
146/// | ---
147/// | title: "Venus"
148/// > | ---
149/// ^
150/// ```
151pub fn close_start(tokenizer: &mut Tokenizer) -> State {
152 if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
153 tokenizer.enter(Name::FrontmatterFence);
154 tokenizer.enter(Name::FrontmatterSequence);
155 State::Retry(StateName::FrontmatterCloseSequence)
156 } else {
157 State::Nok
158 }
159}
160
161/// In close sequence.
162///
163/// ```markdown
164/// | ---
165/// | title: "Venus"
166/// > | ---
167/// ^
168/// ```
169pub fn close_sequence(tokenizer: &mut Tokenizer) -> State {
170 if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
171 tokenizer.tokenize_state.size += 1;
172 tokenizer.consume();
173 State::Next(StateName::FrontmatterCloseSequence)
174 } else if tokenizer.tokenize_state.size == FRONTMATTER_SEQUENCE_SIZE {
175 tokenizer.tokenize_state.size = 0;
176 tokenizer.exit(Name::FrontmatterSequence);
177
178 if matches!(tokenizer.current, Some(b'\t' | b' ')) {
179 tokenizer.attempt(State::Next(StateName::FrontmatterCloseAfter), State::Nok);
180 State::Retry(space_or_tab(tokenizer))
181 } else {
182 State::Retry(StateName::FrontmatterCloseAfter)
183 }
184 } else {
185 tokenizer.tokenize_state.size = 0;
186 State::Nok
187 }
188}
189
190/// After close sequence.
191///
192/// ```markdown
193/// | ---
194/// | title: "Venus"
195/// > | ---
196/// ^
197/// ```
198pub fn close_after(tokenizer: &mut Tokenizer) -> State {
199 match tokenizer.current {
200 None | Some(b'\n') => {
201 tokenizer.exit(Name::FrontmatterFence);
202 State::Ok
203 }
204 _ => State::Nok,
205 }
206}
207
208/// Start of content chunk.
209///
210/// ```markdown
211/// | ---
212/// > | title: "Venus"
213/// ^
214/// | ---
215/// ```
216pub fn content_start(tokenizer: &mut Tokenizer) -> State {
217 match tokenizer.current {
218 None | Some(b'\n') => State::Retry(StateName::FrontmatterContentEnd),
219 Some(_) => {
220 tokenizer.enter(Name::FrontmatterChunk);
221 State::Retry(StateName::FrontmatterContentInside)
222 }
223 }
224}
225
226/// In content chunk.
227///
228/// ```markdown
229/// | ---
230/// > | title: "Venus"
231/// ^
232/// | ---
233/// ```
234pub fn content_inside(tokenizer: &mut Tokenizer) -> State {
235 match tokenizer.current {
236 None | Some(b'\n') => {
237 tokenizer.exit(Name::FrontmatterChunk);
238 State::Retry(StateName::FrontmatterContentEnd)
239 }
240 Some(_) => {
241 tokenizer.consume();
242 State::Next(StateName::FrontmatterContentInside)
243 }
244 }
245}
246
247/// End of content chunk.
248///
249/// ```markdown
250/// | ---
251/// > | title: "Venus"
252/// ^
253/// | ---
254/// ```
255pub fn content_end(tokenizer: &mut Tokenizer) -> State {
256 match tokenizer.current {
257 None => {
258 tokenizer.tokenize_state.marker = 0;
259 State::Nok
260 }
261 Some(b'\n') => {
262 tokenizer.enter(Name::LineEnding);
263 tokenizer.consume();
264 tokenizer.exit(Name::LineEnding);
265 tokenizer.attempt(
266 State::Next(StateName::FrontmatterAfter),
267 State::Next(StateName::FrontmatterContentStart),
268 );
269 State::Next(StateName::FrontmatterCloseStart)
270 }
271 Some(_) => unreachable!("expected eof/eol"),
272 }
273}
274
275/// After frontmatter.
276///
277/// ```markdown
278/// | ---
279/// | title: "Venus"
280/// > | ---
281/// ^
282/// ```
283pub fn after(tokenizer: &mut Tokenizer) -> State {
284 debug_assert!(
285 matches!(tokenizer.current, None | Some(b'\n')),
286 "expected eol/eof after closing fence"
287 );
288 tokenizer.exit(Name::Frontmatter);
289 State::Ok
290}