Markdown parser fork with extended syntax for personal use.
1//! Label occurs in [definition][] and [label end][label_end].
2//!
3//! ## Grammar
4//!
5//! Label forms with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! ; Restriction: maximum `999` codes allowed between brackets.
10//! ; Restriction: no blank lines.
11//! ; Restriction: at least 1 `text` byte must exist.
12//! label ::= '[' *(label_byte | label_escape) ']'
13//! label_byte ::= code - '[' - '\\' - ']'
14//! label_escape ::= '\\' ['[' | '\\' | ']']
15//! ```
16//!
17//! The maximum allowed size of the label, without the brackets, is `999`
18//! (inclusive), which is defined in
19//! [`LINK_REFERENCE_SIZE_MAX`][].
20//!
21//! Labels can contain line endings and whitespace, but they are not allowed to
22//! contain blank lines, and they must not be blank themselves.
23//!
24//! The label is interpreted as the [string][] content type.
25//! That means that [character escapes][character_escape] and
26//! [character references][character_reference] are allowed.
27//!
28//! > 👉 **Note**: this label relates to, but is not, the initial “label” of
29//! > what is know as a reference in markdown:
30//! >
31//! > | Kind | Link | Image |
32//! > | --------- | -------- | --------- |
33//! > | Shortcut | `[x]` | `![x]` |
34//! > | Collapsed | `[x][]` | `![x][]` |
35//! > | Full | `[x][y]` | `![x][y]` |
36//! >
37//! > The 6 above things are references, in the three kinds they come in, as
38//! > links and images.
39//! > The label that this module focusses on is only the thing that contains
40//! > `y`.
41//! >
42//! > The thing that contains `x` is not a single thing when parsing markdown,
43//! > but instead constists of an opening
44//! > ([label start (image)][label_start_image] or
45//! > [label start (link)][label_start_link]) and a closing
46//! > ([label end][label_end]), so as to allow further phrasing such as
47//! > [code (text)][raw_text] or [attention][].
48//!
49//! ## References
50//!
51//! * [`micromark-factory-label/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-label/dev/index.js)
52//!
53//! [definition]: crate::construct::definition
54//! [string]: crate::construct::string
55//! [attention]: crate::construct::attention
56//! [character_escape]: crate::construct::character_escape
57//! [character_reference]: crate::construct::character_reference
58//! [label_start_image]: crate::construct::label_start_image
59//! [label_start_link]: crate::construct::label_start_link
60//! [label_end]: crate::construct::label_end
61//! [raw_text]: crate::construct::raw_text
62//! [link_reference_size_max]: crate::util::constant::LINK_REFERENCE_SIZE_MAX
63
64use crate::construct::partial_space_or_tab_eol::{space_or_tab_eol_with_options, Options};
65use crate::event::{Content, Link, Name};
66use crate::state::{Name as StateName, State};
67use crate::subtokenize::link;
68use crate::tokenizer::Tokenizer;
69use crate::util::constant::LINK_REFERENCE_SIZE_MAX;
70
71/// Start of label.
72///
73/// ```markdown
74/// > | [a]
75/// ^
76/// ```
77pub fn start(tokenizer: &mut Tokenizer) -> State {
78 debug_assert_eq!(tokenizer.current, Some(b'['), "expected `[`");
79 tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
80 tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
81 tokenizer.consume();
82 tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
83 tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
84 State::Next(StateName::LabelAtBreak)
85}
86
87/// In label, at something, before something else.
88///
89/// ```markdown
90/// > | [a]
91/// ^
92/// ```
93pub fn at_break(tokenizer: &mut Tokenizer) -> State {
94 if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX
95 || matches!(tokenizer.current, None | Some(b'['))
96 || (matches!(tokenizer.current, Some(b']')) && !tokenizer.tokenize_state.seen)
97 {
98 State::Retry(StateName::LabelNok)
99 } else {
100 match tokenizer.current {
101 Some(b'\n') => {
102 tokenizer.attempt(
103 State::Next(StateName::LabelEolAfter),
104 State::Next(StateName::LabelNok),
105 );
106 State::Retry(space_or_tab_eol_with_options(
107 tokenizer,
108 Options {
109 content: Some(Content::String),
110 connect: tokenizer.tokenize_state.connect,
111 },
112 ))
113 }
114 Some(b']') => {
115 tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
116 tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
117 tokenizer.consume();
118 tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
119 tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
120 tokenizer.tokenize_state.connect = false;
121 tokenizer.tokenize_state.seen = false;
122 tokenizer.tokenize_state.size = 0;
123 State::Ok
124 }
125 _ => {
126 tokenizer.enter_link(
127 Name::Data,
128 Link {
129 previous: None,
130 next: None,
131 content: Content::String,
132 },
133 );
134
135 if tokenizer.tokenize_state.connect {
136 let index = tokenizer.events.len() - 1;
137 link(&mut tokenizer.events, index);
138 } else {
139 tokenizer.tokenize_state.connect = true;
140 }
141
142 State::Retry(StateName::LabelInside)
143 }
144 }
145 }
146}
147
148/// In label, after whitespace.
149///
150/// ```markdown
151/// | [a␊
152/// > | b]
153/// ^
154/// ```
155pub fn eol_after(tokenizer: &mut Tokenizer) -> State {
156 tokenizer.tokenize_state.connect = true;
157 State::Retry(StateName::LabelAtBreak)
158}
159
160/// In label, on something disallowed.
161///
162/// ```markdown
163/// > | []
164/// ^
165/// ```
166pub fn nok(tokenizer: &mut Tokenizer) -> State {
167 tokenizer.tokenize_state.connect = false;
168 tokenizer.tokenize_state.seen = false;
169 tokenizer.tokenize_state.size = 0;
170 State::Nok
171}
172
173/// In label, in text.
174///
175/// ```markdown
176/// > | [a]
177/// ^
178/// ```
179pub fn inside(tokenizer: &mut Tokenizer) -> State {
180 match tokenizer.current {
181 None | Some(b'\n' | b'[' | b']') => {
182 tokenizer.exit(Name::Data);
183 State::Retry(StateName::LabelAtBreak)
184 }
185 Some(byte) => {
186 if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX {
187 tokenizer.exit(Name::Data);
188 State::Retry(StateName::LabelAtBreak)
189 } else {
190 tokenizer.consume();
191 tokenizer.tokenize_state.size += 1;
192 if !tokenizer.tokenize_state.seen && !matches!(byte, b'\t' | b' ') {
193 tokenizer.tokenize_state.seen = true;
194 }
195 State::Next(if matches!(byte, b'\\') {
196 StateName::LabelEscape
197 } else {
198 StateName::LabelInside
199 })
200 }
201 }
202 }
203}
204
205/// After `\`, at a special character.
206///
207/// ```markdown
208/// > | [a\*a]
209/// ^
210/// ```
211pub fn escape(tokenizer: &mut Tokenizer) -> State {
212 match tokenizer.current {
213 Some(b'[' | b'\\' | b']') => {
214 tokenizer.consume();
215 tokenizer.tokenize_state.size += 1;
216 State::Next(StateName::LabelInside)
217 }
218 _ => State::Retry(StateName::LabelInside),
219 }
220}