Markdown parser fork with extended syntax for personal use.
1//! Block quotes occur in the [document][] content type.
2//!
3//! ## Grammar
4//!
5//! Block quotes form with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! block_quote_start ::= '>' [ space_or_tab ]
10//! block_quote_cont ::= '>' [ space_or_tab ]
11//! ```
12//!
13//! Further lines that are not prefixed with `block_quote_cont` cause the block
14//! quote to be exited, except when those lines are lazy continuation.
15//! Like so many things in markdown, block quotes too are complex.
16//! See [*§ Phase 1: block structure* in `CommonMark`][commonmark-block] for
17//! more on parsing details.
18//!
19//! As block quote is a container, it takes several bytes from the start of the
20//! line, while the rest of the line includes more containers or flow.
21//!
22//! ## HTML
23//!
24//! Block quote relates to the `<blockquote>` element in HTML.
25//! See [*§ 4.4.4 The `blockquote` element*][html-blockquote] in the HTML spec
26//! for more info.
27//!
28//! ## Recommendation
29//!
30//! Always use a single space after a block quote marker (`>`).
31//! Never use lazy continuation.
32//!
33//! ## Tokens
34//!
35//! * [`BlockQuote`][Name::BlockQuote]
36//! * [`BlockQuoteMarker`][Name::BlockQuoteMarker]
37//! * [`BlockQuotePrefix`][Name::BlockQuotePrefix]
38//! * [`SpaceOrTab`][Name::SpaceOrTab]
39//!
40//! ## References
41//!
42//! * [`block-quote.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/block-quote.js)
43//! * [*§ 5.1 Block quotes* in `CommonMark`](https://spec.commonmark.org/0.31/#block-quotes)
44//!
45//! [document]: crate::construct::document
46//! [html-blockquote]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-blockquote-element
47//! [commonmark-block]: https://spec.commonmark.org/0.31/#phase-1-block-structure
48
49use crate::construct::partial_space_or_tab::space_or_tab_min_max;
50use crate::event::Name;
51use crate::state::{Name as StateName, State};
52use crate::tokenizer::Tokenizer;
53use crate::util::constant::TAB_SIZE;
54
55/// Start of block quote.
56///
57/// ```markdown
58/// > | > a
59/// ^
60/// ```
61pub fn start(tokenizer: &mut Tokenizer) -> State {
62 if tokenizer.parse_state.options.constructs.block_quote {
63 tokenizer.enter(Name::BlockQuote);
64
65 State::Retry(StateName::BlockQuoteContStart)
66 } else {
67 State::Nok
68 }
69}
70
71/// Start of block quote continuation.
72///
73/// Also used to parse the first block quote opening.
74///
75/// ```markdown
76/// | > a
77/// > | > b
78/// ^
79/// ```
80pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
81 if matches!(tokenizer.current, Some(b'\t' | b' ')) {
82 tokenizer.attempt(State::Next(StateName::BlockQuoteContBefore), State::Nok);
83 State::Retry(space_or_tab_min_max(
84 tokenizer,
85 1,
86 if tokenizer.parse_state.options.constructs.code_indented {
87 TAB_SIZE - 1
88 } else {
89 usize::MAX
90 },
91 ))
92 } else {
93 State::Retry(StateName::BlockQuoteContBefore)
94 }
95}
96
97/// At `>`, after optional whitespace.
98///
99/// Also used to parse the first block quote opening.
100///
101/// ```markdown
102/// | > a
103/// > | > b
104/// ^
105/// ```
106pub fn cont_before(tokenizer: &mut Tokenizer) -> State {
107 match tokenizer.current {
108 Some(b'>') => {
109 tokenizer.enter(Name::BlockQuotePrefix);
110 tokenizer.enter(Name::BlockQuoteMarker);
111 tokenizer.consume();
112 tokenizer.exit(Name::BlockQuoteMarker);
113
114 // If we are looking for obsidian block quote metadata...
115 if tokenizer.parse_state.options.constructs.obs_block_quote {
116 // Clean up our flags
117 tokenizer.tokenize_state.seen = false;
118
119 // Start looking for the metadata!
120 tokenizer.attempt(
121 State::Next(StateName::BlockQuoteContBeforeNoCallout),
122 State::Next(StateName::BlockQuoteContAfter),
123 );
124 return State::Next(StateName::ObsidianBlockQuoteCalloutStart);
125 }
126
127 State::Next(StateName::BlockQuoteContAfter)
128 }
129 _ => State::Nok,
130 }
131}
132
133// Just the original version of the previous function, without looking for
134// Obsidian callout blocks...
135pub fn cont_before_no_callout(tokenizer: &mut Tokenizer) -> State {
136 match tokenizer.current {
137 Some(b'>') => {
138 tokenizer.enter(Name::BlockQuotePrefix);
139 tokenizer.enter(Name::BlockQuoteMarker);
140 tokenizer.consume();
141 tokenizer.exit(Name::BlockQuoteMarker);
142
143 State::Next(StateName::BlockQuoteContAfter)
144 }
145 _ => State::Nok,
146 }
147}
148
149/// After `>`, before optional whitespace.
150///
151/// ```markdown
152/// > | > a
153/// ^
154/// > | >b
155/// ^
156/// ```
157pub fn cont_after(tokenizer: &mut Tokenizer) -> State {
158 if let Some(b'\t' | b' ') = tokenizer.current {
159 tokenizer.enter(Name::SpaceOrTab);
160 tokenizer.consume();
161 tokenizer.exit(Name::SpaceOrTab);
162 }
163
164 tokenizer.exit(Name::BlockQuotePrefix);
165 State::Ok
166}
167
168pub fn obs_callout_start(tokenizer: &mut Tokenizer) -> State {
169 match tokenizer.current {
170 // Eat all whitespace before a callout
171 Some(b' ' | b'\t') => {
172 tokenizer.attempt(
173 State::Next(StateName::ObsidianBlockQuoteCalloutStart),
174 // State::Next(StateName::BlockQuoteContAfter),
175 State::Nok,
176 );
177 State::Retry(space_or_tab_min_max(
178 tokenizer,
179 1,
180 if tokenizer.parse_state.options.constructs.code_indented {
181 TAB_SIZE - 1
182 } else {
183 usize::MAX
184 },
185 ))
186 }
187 Some(b'[') => {
188 // We reuse state to mark that we've seen a possible callout start
189 tokenizer.tokenize_state.seen = true;
190 tokenizer.consume();
191 State::Next(StateName::ObsidianBlockQuoteCalloutStart)
192 }
193 Some(b'!') if tokenizer.tokenize_state.seen => {
194 // We're inside a callout now
195 tokenizer.enter(Name::ObsidianBlockQuoteCallout);
196 tokenizer.enter(Name::ObsidianBlockQuoteCalloutType);
197 tokenizer.consume();
198
199 // We reuse this state variable until we've found a closing ]
200 tokenizer.tokenize_state.seen = true;
201 State::Next(StateName::ObsidianBlockQuoteCalloutInner)
202 }
203 _ => State::Nok,
204 }
205}
206
207/// ```markdown
208/// > [!blahblah]
209/// ^
210/// ```
211pub fn obs_callout_inner(tokenizer: &mut Tokenizer) -> State {
212 match tokenizer.current {
213 Some(b']') if tokenizer.tokenize_state.seen => {
214 // We've seen the end of the callout declaration
215 tokenizer.tokenize_state.seen = false;
216
217 // TODO: Optionally check for - character
218 tokenizer.consume();
219 tokenizer.exit(Name::ObsidianBlockQuoteCalloutType);
220 tokenizer.enter(Name::ObsidianText);
221 State::Next(StateName::ObsidianBlockQuoteCalloutInner)
222 }
223 Some(b'\n') => {
224 // Resume normal flow!
225 tokenizer.consume();
226 tokenizer.exit(Name::ObsidianText);
227 tokenizer.exit(Name::ObsidianBlockQuoteCallout);
228 tokenizer.tokenize_state.seen = false;
229 // State::Next(StateName::BlockQuoteContBefore)
230 State::Ok
231 }
232 _ if tokenizer.tokenize_state.seen => {
233 tokenizer.consume();
234 State::Next(StateName::ObsidianBlockQuoteCalloutInner)
235 }
236 _ => {
237 // We just save whatever this is as text
238 tokenizer.consume();
239 State::Next(StateName::ObsidianBlockQuoteCalloutInner)
240 }
241 }
242}