Markdown parser fork with extended syntax for personal use.
1//! Space or tab (eol) occurs in [destination][], [label][], and [title][].
2//!
3//! ## Grammar
4//!
5//! Space or tab (eol) forms with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! space_or_tab_eol ::= 1*space_or_tab | *space_or_tab eol *space_or_tab
10//! ```
11//!
12//! Importantly, this allows one line ending, but not blank lines.
13//!
14//! ## References
15//!
16//! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js)
17//!
18//! [destination]: crate::construct::partial_destination
19//! [label]: crate::construct::partial_label
20//! [title]: crate::construct::partial_title
21
22use crate::construct::partial_space_or_tab::{
23 space_or_tab_with_options, Options as SpaceOrTabOptions,
24};
25use crate::event::{Content, Link, Name};
26use crate::state::{Name as StateName, State};
27use crate::subtokenize::link;
28use crate::tokenizer::Tokenizer;
29
30/// Configuration.
31#[derive(Debug)]
32pub struct Options {
33 /// Connect this whitespace to the previous.
34 pub connect: bool,
35 /// Embedded content type to use.
36 pub content: Option<Content>,
37}
38
39/// `space_or_tab_eol`
40pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName {
41 space_or_tab_eol_with_options(
42 tokenizer,
43 Options {
44 content: None,
45 connect: false,
46 },
47 )
48}
49
50/// `space_or_tab_eol`, with the given options.
51pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName {
52 tokenizer.tokenize_state.space_or_tab_eol_content = options.content;
53 tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect;
54 StateName::SpaceOrTabEolStart
55}
56
57/// Start of whitespace with at most one eol.
58///
59/// ```markdown
60/// > | a␠␠b
61/// ^
62/// > | a␠␠␊
63/// ^
64/// | ␠␠b
65/// ```
66pub fn start(tokenizer: &mut Tokenizer) -> State {
67 match tokenizer.current {
68 Some(b'\t' | b' ') => {
69 tokenizer.attempt(
70 State::Next(StateName::SpaceOrTabEolAfterFirst),
71 State::Next(StateName::SpaceOrTabEolAtEol),
72 );
73
74 State::Retry(space_or_tab_with_options(
75 tokenizer,
76 SpaceOrTabOptions {
77 kind: Name::SpaceOrTab,
78 min: 1,
79 max: usize::MAX,
80 content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
81 connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
82 },
83 ))
84 }
85 _ => State::Retry(StateName::SpaceOrTabEolAtEol),
86 }
87}
88
89/// After initial whitespace, at optional eol.
90///
91/// ```markdown
92/// > | a␠␠b
93/// ^
94/// > | a␠␠␊
95/// ^
96/// | ␠␠b
97/// ```
98pub fn after_first(tokenizer: &mut Tokenizer) -> State {
99 tokenizer.tokenize_state.space_or_tab_eol_ok = true;
100 debug_assert!(
101 tokenizer.tokenize_state.space_or_tab_eol_content.is_none(),
102 "expected no content"
103 );
104 // If the above ever errors, set `tokenizer.tokenize_state.space_or_tab_eol_connect: true` in that case.
105 State::Retry(StateName::SpaceOrTabEolAtEol)
106}
107
108/// After optional whitespace, at eol.
109///
110/// ```markdown
111/// > | a␠␠b
112/// ^
113/// > | a␠␠␊
114/// ^
115/// | ␠␠b
116/// > | a␊
117/// ^
118/// | ␠␠b
119/// ```
120pub fn at_eol(tokenizer: &mut Tokenizer) -> State {
121 if let Some(b'\n') = tokenizer.current {
122 if let Some(ref content) = tokenizer.tokenize_state.space_or_tab_eol_content {
123 tokenizer.enter_link(
124 Name::LineEnding,
125 Link {
126 previous: None,
127 next: None,
128 content: content.clone(),
129 },
130 );
131 } else {
132 tokenizer.enter(Name::LineEnding);
133 }
134
135 if tokenizer.tokenize_state.space_or_tab_eol_connect {
136 let index = tokenizer.events.len() - 1;
137 link(&mut tokenizer.events, index);
138 } else if tokenizer.tokenize_state.space_or_tab_eol_content.is_some() {
139 tokenizer.tokenize_state.space_or_tab_eol_connect = true;
140 }
141
142 tokenizer.consume();
143 tokenizer.exit(Name::LineEnding);
144 State::Next(StateName::SpaceOrTabEolAfterEol)
145 } else {
146 let ok = tokenizer.tokenize_state.space_or_tab_eol_ok;
147 tokenizer.tokenize_state.space_or_tab_eol_content = None;
148 tokenizer.tokenize_state.space_or_tab_eol_connect = false;
149 tokenizer.tokenize_state.space_or_tab_eol_ok = false;
150 if ok {
151 State::Ok
152 } else {
153 State::Nok
154 }
155 }
156}
157
158/// After eol.
159///
160/// ```markdown
161/// | a␠␠␊
162/// > | ␠␠b
163/// ^
164/// | a␊
165/// > | ␠␠b
166/// ^
167/// ```
168pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
169 if matches!(tokenizer.current, Some(b'\t' | b' ')) {
170 tokenizer.attempt(State::Next(StateName::SpaceOrTabEolAfterMore), State::Nok);
171 State::Retry(space_or_tab_with_options(
172 tokenizer,
173 SpaceOrTabOptions {
174 kind: Name::SpaceOrTab,
175 min: 1,
176 max: usize::MAX,
177 content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
178 connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
179 },
180 ))
181 } else {
182 State::Retry(StateName::SpaceOrTabEolAfterMore)
183 }
184}
185
186/// After optional final whitespace.
187///
188/// ```markdown
189/// | a␠␠␊
190/// > | ␠␠b
191/// ^
192/// | a␊
193/// > | ␠␠b
194/// ^
195/// ```
196pub fn after_more(tokenizer: &mut Tokenizer) -> State {
197 debug_assert!(
198 !matches!(tokenizer.current, None | Some(b'\n')),
199 "did not expect blank line"
200 );
201 // If the above ever starts erroring, gracefully `State::Nok` on it.
202 // Currently it doesn’t happen, as we only use this in content, which does
203 // not allow blank lines.
204 tokenizer.tokenize_state.space_or_tab_eol_content = None;
205 tokenizer.tokenize_state.space_or_tab_eol_connect = false;
206 tokenizer.tokenize_state.space_or_tab_eol_ok = false;
207 State::Ok
208}