Markdown parser fork with extended syntax for personal use.
1//! Heading (atx) occurs in the [flow][] content type.
2//!
3//! ## Grammar
4//!
5//! Heading (atx) forms with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! heading_atx ::= 1*6'#' [ 1*space_or_tab line [ 1*space_or_tab 1*'#' ] ] *space_or_tab
10//! ```
11//!
12//! As this construct occurs in flow, like all flow constructs, it must be
13//! followed by an eol (line ending) or eof (end of file).
14//!
15//! `CommonMark` introduced the requirement on whitespace existing after the
16//! opening sequence and before text.
17//! In older markdown versions, this was not required, and headings would form
18//! without it.
19//!
20//! In markdown, it is also possible to create headings with a
21//! [heading (setext)][heading_setext] construct.
22//! The benefit of setext headings is that their text can include line endings,
23//! and by extensions also hard breaks (e.g., with
24//! [hard break (escape)][hard_break_escape]).
25//! However, their limit is that they cannot form `<h3>` through `<h6>`
26//! headings.
27//!
28//! > 🏛 **Background**: the word *setext* originates from a small markup
29//! > language by Ian Feldman from 1991.
30//! > See [*§ Setext* on Wikipedia][wiki_setext] for more info.
31//! > The word *atx* originates from a tiny markup language by Aaron Swartz
32//! > from 2002.
33//! > See [*§ atx, the true structured text format* on `aaronsw.com`][atx] for
34//! > more info.
35//!
36//! ## HTML
37//!
38//! Headings in markdown relate to the `<h1>` through `<h6>` elements in HTML.
39//! See [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements* in the
40//! HTML spec][html] for more info.
41//!
42//! ## Recommendation
43//!
44//! Always use heading (atx), never heading (setext).
45//!
46//! ## Tokens
47//!
48//! * [`HeadingAtx`][Name::HeadingAtx]
49//! * [`HeadingAtxSequence`][Name::HeadingAtxSequence]
50//! * [`HeadingAtxText`][Name::HeadingAtxText]
51//! * [`SpaceOrTab`][Name::SpaceOrTab]
52//!
53//! ## References
54//!
55//! * [`heading-atx.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/heading-atx.js)
56//! * [*§ 4.2 ATX headings* in `CommonMark`](https://spec.commonmark.org/0.31/#atx-headings)
57//!
58//! [flow]: crate::construct::flow
59//! [heading_setext]: crate::construct::heading_setext
60//! [hard_break_escape]: crate::construct::hard_break_escape
61//! [html]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
62//! [wiki_setext]: https://en.wikipedia.org/wiki/Setext
63//! [atx]: http://www.aaronsw.com/2002/atx/
64
65use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
66use crate::event::{Content, Event, Kind, Link, Name};
67use crate::resolve::Name as ResolveName;
68use crate::state::{Name as StateName, State};
69use crate::subtokenize::Subresult;
70use crate::tokenizer::Tokenizer;
71use crate::util::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
72use alloc::vec;
73
74/// Start of a heading (atx).
75///
76/// ```markdown
77/// > | ## aa
78/// ^
79/// ```
80pub fn start(tokenizer: &mut Tokenizer) -> State {
81 if tokenizer.parse_state.options.constructs.heading_atx {
82 tokenizer.enter(Name::HeadingAtx);
83 if matches!(tokenizer.current, Some(b'\t' | b' ')) {
84 tokenizer.attempt(State::Next(StateName::HeadingAtxBefore), State::Nok);
85 State::Retry(space_or_tab_min_max(
86 tokenizer,
87 0,
88 if tokenizer.parse_state.options.constructs.code_indented {
89 TAB_SIZE - 1
90 } else {
91 usize::MAX
92 },
93 ))
94 } else {
95 State::Retry(StateName::HeadingAtxBefore)
96 }
97 } else {
98 State::Nok
99 }
100}
101
102/// After optional whitespace, at `#`.
103///
104/// ```markdown
105/// > | ## aa
106/// ^
107/// ```
108pub fn before(tokenizer: &mut Tokenizer) -> State {
109 if Some(b'#') == tokenizer.current {
110 tokenizer.enter(Name::HeadingAtxSequence);
111 State::Retry(StateName::HeadingAtxSequenceOpen)
112 } else {
113 State::Nok
114 }
115}
116
117/// In opening sequence.
118///
119/// ```markdown
120/// > | ## aa
121/// ^
122/// ```
123pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
124 if tokenizer.current == Some(b'#')
125 && tokenizer.tokenize_state.size < HEADING_ATX_OPENING_FENCE_SIZE_MAX
126 {
127 tokenizer.tokenize_state.size += 1;
128 tokenizer.consume();
129 State::Next(StateName::HeadingAtxSequenceOpen)
130 }
131 // Always at least one `#`.
132 else if matches!(tokenizer.current, None | Some(b'\t' | b'\n' | b' ')) {
133 tokenizer.tokenize_state.size = 0;
134 tokenizer.exit(Name::HeadingAtxSequence);
135 State::Retry(StateName::HeadingAtxAtBreak)
136 } else {
137 tokenizer.tokenize_state.size = 0;
138 State::Nok
139 }
140}
141
142/// After something, before something else.
143///
144/// ```markdown
145/// > | ## aa
146/// ^
147/// ```
148pub fn at_break(tokenizer: &mut Tokenizer) -> State {
149 match tokenizer.current {
150 None | Some(b'\n') => {
151 tokenizer.exit(Name::HeadingAtx);
152 tokenizer.register_resolver(ResolveName::HeadingAtx);
153 // Feel free to interrupt.
154 tokenizer.interrupt = false;
155 State::Ok
156 }
157 Some(b'\t' | b' ') => {
158 tokenizer.attempt(State::Next(StateName::HeadingAtxAtBreak), State::Nok);
159 State::Retry(space_or_tab(tokenizer))
160 }
161 Some(b'#') => {
162 tokenizer.enter(Name::HeadingAtxSequence);
163 State::Retry(StateName::HeadingAtxSequenceFurther)
164 }
165 Some(_) => {
166 tokenizer.enter_link(
167 Name::Data,
168 Link {
169 previous: None,
170 next: None,
171 content: Content::Text,
172 },
173 );
174 State::Retry(StateName::HeadingAtxData)
175 }
176 }
177}
178
179/// In further sequence (after whitespace).
180///
181/// Could be normal “visible” hashes in the heading or a final sequence.
182///
183/// ```markdown
184/// > | ## aa ##
185/// ^
186/// ```
187pub fn sequence_further(tokenizer: &mut Tokenizer) -> State {
188 if let Some(b'#') = tokenizer.current {
189 tokenizer.consume();
190 State::Next(StateName::HeadingAtxSequenceFurther)
191 } else {
192 tokenizer.exit(Name::HeadingAtxSequence);
193 State::Retry(StateName::HeadingAtxAtBreak)
194 }
195}
196
197/// In text.
198///
199/// ```markdown
200/// > | ## aa
201/// ^
202/// ```
203pub fn data(tokenizer: &mut Tokenizer) -> State {
204 match tokenizer.current {
205 // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.
206 None | Some(b'\t' | b'\n' | b' ') => {
207 tokenizer.exit(Name::Data);
208 State::Retry(StateName::HeadingAtxAtBreak)
209 }
210 _ => {
211 tokenizer.consume();
212 State::Next(StateName::HeadingAtxData)
213 }
214 }
215}
216
217/// Resolve heading (atx).
218pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
219 let mut index = 0;
220 let mut heading_inside = false;
221 let mut data_start: Option<usize> = None;
222 let mut data_end: Option<usize> = None;
223
224 while index < tokenizer.events.len() {
225 let event = &tokenizer.events[index];
226
227 if event.name == Name::HeadingAtx {
228 if event.kind == Kind::Enter {
229 heading_inside = true;
230 } else {
231 if let Some(start) = data_start {
232 // If `start` is some, `end` is too.
233 let end = data_end.unwrap();
234
235 tokenizer.map.add(
236 start,
237 0,
238 vec![Event {
239 kind: Kind::Enter,
240 name: Name::HeadingAtxText,
241 point: tokenizer.events[start].point.clone(),
242 link: None,
243 }],
244 );
245
246 // Remove everything between the start and the end.
247 tokenizer.map.add(start + 1, end - start - 1, vec![]);
248
249 tokenizer.map.add(
250 end + 1,
251 0,
252 vec![Event {
253 kind: Kind::Exit,
254 name: Name::HeadingAtxText,
255 point: tokenizer.events[end].point.clone(),
256 link: None,
257 }],
258 );
259 }
260
261 heading_inside = false;
262 data_start = None;
263 data_end = None;
264 }
265 } else if heading_inside && event.name == Name::Data {
266 if event.kind == Kind::Enter {
267 if data_start.is_none() {
268 data_start = Some(index);
269 }
270 } else {
271 data_end = Some(index);
272 }
273 }
274
275 index += 1;
276 }
277
278 tokenizer.map.consume(&mut tokenizer.events);
279 None
280}