Markdown parser fork with extended syntax for personal use.
1//! List item occurs in the [document][] content type.
2//!
3//! ## Grammar
4//!
5//! List item forms with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! ; Restriction: if there is no space after the marker, the start must be followed by an `eol`.
10//! ; Restriction: if the first line after the marker is not blank and starts with `5(space_or_tab)`,
11//! ; only the first `space_or_tab` is part of the start.
12//! list_item_start ::= '*' | '+' | '-' | 1*9(ascii_decimal) ('.' | ')') [1*4 space_or_tab]
13//!
14//! ; Restriction: blank line allowed, except when this is the first continuation after a blank start.
15//! ; Restriction: if not blank, the line must be indented, exactly `n` times.
16//! list_item_cont ::= [n(space_or_tab)]
17//! ```
18//!
19//! Further lines that are not prefixed with `list_item_cont` cause the list
20//! item to be exited, except when those lines are lazy continuation or blank.
21//! Like so many things in markdown, list items too are complex.
22//! See [*§ Phase 1: block structure* in `CommonMark`][commonmark_block] for
23//! more on parsing details.
24//!
25//! As list item is a container, it takes several bytes from the start of the
26//! line, while the rest of the line includes more containers or flow.
27//!
28//! ## HTML
29//!
30//! List item relates to the `<li>`, `<ol>`, and `<ul>` elements in HTML.
31//! See [*§ 4.4.8 The `li` element*][html_li],
32//! [*§ 4.4.5 The `ol` element*][html_ol], and
33//! [*§ 4.4.7 The `ul` element*][html_ul] in the HTML spec for more info.
34//!
35//! ## Recommendation
36//!
37//! Use a single space after a marker.
38//! Never use lazy continuation.
39//!
40//! ## Tokens
41//!
42//! * [`ListItem`][Name::ListItem]
43//! * [`ListItemMarker`][Name::ListItemMarker]
44//! * [`ListItemPrefix`][Name::ListItemPrefix]
45//! * [`ListItemValue`][Name::ListItemValue]
46//! * [`ListOrdered`][Name::ListOrdered]
47//! * [`ListUnordered`][Name::ListUnordered]
48//!
49//! ## References
50//!
51//! * [`list.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/list.js)
52//! * [*§ 5.2 List items* in `CommonMark`](https://spec.commonmark.org/0.31/#list-items)
53//! * [*§ 5.3 Lists* in `CommonMark`](https://spec.commonmark.org/0.31/#lists)
54//!
55//! [document]: crate::construct::document
56//! [html_li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element
57//! [html_ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element
58//! [html_ul]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ul-element
59//! [commonmark_block]: https://spec.commonmark.org/0.31/#phase-1-block-structure
60
61use crate::construct::partial_space_or_tab::space_or_tab_min_max;
62use crate::event::{Kind, Name};
63use crate::resolve::Name as ResolveName;
64use crate::state::{Name as StateName, State};
65use crate::subtokenize::Subresult;
66use crate::tokenizer::Tokenizer;
67use crate::util::{
68 constant::{LIST_ITEM_VALUE_SIZE_MAX, TAB_SIZE},
69 skip,
70 slice::{Position, Slice},
71};
72use alloc::{vec, vec::Vec};
73
74/// Start of list item.
75///
76/// ```markdown
77/// > | * a
78/// ^
79/// ```
80pub fn start(tokenizer: &mut Tokenizer) -> State {
81 if tokenizer.parse_state.options.constructs.list_item {
82 tokenizer.enter(Name::ListItem);
83
84 if matches!(tokenizer.current, Some(b'\t' | b' ')) {
85 tokenizer.attempt(State::Next(StateName::ListItemBefore), State::Nok);
86 State::Retry(space_or_tab_min_max(
87 tokenizer,
88 0,
89 if tokenizer.parse_state.options.constructs.code_indented {
90 TAB_SIZE - 1
91 } else {
92 usize::MAX
93 },
94 ))
95 } else {
96 State::Retry(StateName::ListItemBefore)
97 }
98 } else {
99 State::Nok
100 }
101}
102
103/// After optional whitespace, at list item prefix.
104///
105/// ```markdown
106/// > | * a
107/// ^
108/// ```
109pub fn before(tokenizer: &mut Tokenizer) -> State {
110 // Unordered.
111 if matches!(tokenizer.current, Some(b'*' | b'-')) {
112 tokenizer.check(State::Nok, State::Next(StateName::ListItemBeforeUnordered));
113 State::Retry(StateName::ThematicBreakStart)
114 } else if tokenizer.current == Some(b'+') {
115 State::Retry(StateName::ListItemBeforeUnordered)
116 }
117 // Ordered.
118 else if tokenizer.current == Some(b'1')
119 || (matches!(tokenizer.current, Some(b'0'..=b'9')) && !tokenizer.interrupt)
120 {
121 State::Retry(StateName::ListItemBeforeOrdered)
122 } else {
123 State::Nok
124 }
125}
126
127/// At unordered list item marker.
128///
129/// The line is not a thematic break.
130///
131/// ```markdown
132/// > | * a
133/// ^
134/// ```
135pub fn before_unordered(tokenizer: &mut Tokenizer) -> State {
136 tokenizer.enter(Name::ListItemPrefix);
137 State::Retry(StateName::ListItemMarker)
138}
139
140/// At ordered list item value.
141///
142/// ```markdown
143/// > | * a
144/// ^
145/// ```
146pub fn before_ordered(tokenizer: &mut Tokenizer) -> State {
147 tokenizer.enter(Name::ListItemPrefix);
148 tokenizer.enter(Name::ListItemValue);
149 State::Retry(StateName::ListItemValue)
150}
151
152/// In ordered list item value.
153///
154/// ```markdown
155/// > | 1. a
156/// ^
157/// ```
158pub fn value(tokenizer: &mut Tokenizer) -> State {
159 if matches!(tokenizer.current, Some(b'.' | b')'))
160 && (!tokenizer.interrupt || tokenizer.tokenize_state.size < 2)
161 {
162 tokenizer.exit(Name::ListItemValue);
163 State::Retry(StateName::ListItemMarker)
164 } else if matches!(tokenizer.current, Some(b'0'..=b'9'))
165 && tokenizer.tokenize_state.size + 1 < LIST_ITEM_VALUE_SIZE_MAX
166 {
167 tokenizer.tokenize_state.size += 1;
168 tokenizer.consume();
169 State::Next(StateName::ListItemValue)
170 } else {
171 tokenizer.tokenize_state.size = 0;
172 State::Nok
173 }
174}
175
176/// At list item marker.
177///
178/// ```markdown
179/// > | * a
180/// ^
181/// > | 1. b
182/// ^
183/// ```
184pub fn marker(tokenizer: &mut Tokenizer) -> State {
185 tokenizer.enter(Name::ListItemMarker);
186 tokenizer.consume();
187 tokenizer.exit(Name::ListItemMarker);
188 State::Next(StateName::ListItemMarkerAfter)
189}
190
191/// After list item marker.
192///
193/// ```markdown
194/// > | * a
195/// ^
196/// > | 1. b
197/// ^
198/// ```
199pub fn marker_after(tokenizer: &mut Tokenizer) -> State {
200 tokenizer.tokenize_state.size = 1;
201 tokenizer.check(
202 State::Next(StateName::ListItemAfter),
203 State::Next(StateName::ListItemMarkerAfterFilled),
204 );
205 State::Retry(StateName::BlankLineStart)
206}
207
208/// After list item marker.
209///
210/// The marker is not followed by a blank line.
211///
212/// ```markdown
213/// > | * a
214/// ^
215/// ```
216pub fn marker_after_filled(tokenizer: &mut Tokenizer) -> State {
217 tokenizer.tokenize_state.size = 0;
218
219 // Attempt to parse up to the largest allowed indent, `nok` if there is more whitespace.
220 tokenizer.attempt(
221 State::Next(StateName::ListItemAfter),
222 State::Next(StateName::ListItemPrefixOther),
223 );
224 State::Retry(StateName::ListItemWhitespace)
225}
226
227/// After marker, at whitespace.
228///
229/// ```markdown
230/// > | * a
231/// ^
232/// ```
233pub fn whitespace(tokenizer: &mut Tokenizer) -> State {
234 tokenizer.attempt(State::Next(StateName::ListItemWhitespaceAfter), State::Nok);
235 State::Retry(space_or_tab_min_max(tokenizer, 1, TAB_SIZE))
236}
237
238/// After acceptable whitespace.
239///
240/// ```markdown
241/// > | * a
242/// ^
243/// ```
244pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
245 if let Some(b'\t' | b' ') = tokenizer.current {
246 State::Nok
247 } else {
248 State::Ok
249 }
250}
251
252/// After marker, followed by no indent or more indent that needed.
253///
254/// ```markdown
255/// > | * a
256/// ^
257/// ```
258pub fn prefix_other(tokenizer: &mut Tokenizer) -> State {
259 match tokenizer.current {
260 Some(b'\t' | b' ') => {
261 tokenizer.enter(Name::SpaceOrTab);
262 tokenizer.consume();
263 tokenizer.exit(Name::SpaceOrTab);
264 State::Next(StateName::ListItemAfter)
265 }
266 _ => State::Nok,
267 }
268}
269
270/// After list item prefix.
271///
272/// ```markdown
273/// > | * a
274/// ^
275/// ```
276pub fn after(tokenizer: &mut Tokenizer) -> State {
277 let blank = tokenizer.tokenize_state.size == 1;
278 tokenizer.tokenize_state.size = 0;
279
280 if blank && tokenizer.interrupt {
281 State::Nok
282 } else {
283 let start = skip::to_back(
284 &tokenizer.events,
285 tokenizer.events.len() - 1,
286 &[Name::ListItem],
287 );
288 let mut prefix = Slice::from_position(
289 tokenizer.parse_state.bytes,
290 &Position {
291 start: &tokenizer.events[start].point,
292 end: &tokenizer.point,
293 },
294 )
295 .len();
296
297 if blank {
298 prefix += 1;
299 }
300
301 let container = &mut tokenizer.tokenize_state.document_container_stack
302 [tokenizer.tokenize_state.document_continued];
303
304 container.blank_initial = blank;
305 container.size = prefix;
306
307 tokenizer.exit(Name::ListItemPrefix);
308 tokenizer.register_resolver_before(ResolveName::ListItem);
309 State::Ok
310 }
311}
312
313/// Start of list item continuation.
314///
315/// ```markdown
316/// | * a
317/// > | b
318/// ^
319/// ```
320pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
321 tokenizer.check(
322 State::Next(StateName::ListItemContBlank),
323 State::Next(StateName::ListItemContFilled),
324 );
325 State::Retry(StateName::BlankLineStart)
326}
327
328/// Start of blank list item continuation.
329///
330/// ```markdown
331/// | * a
332/// > |
333/// ^
334/// | b
335/// ```
336pub fn cont_blank(tokenizer: &mut Tokenizer) -> State {
337 let container = &mut tokenizer.tokenize_state.document_container_stack
338 [tokenizer.tokenize_state.document_continued];
339 let size = container.size;
340
341 if container.blank_initial {
342 State::Nok
343 } else if matches!(tokenizer.current, Some(b'\t' | b' ')) {
344 // Consume, optionally, at most `size`.
345 State::Retry(space_or_tab_min_max(tokenizer, 0, size))
346 } else {
347 State::Ok
348 }
349}
350
351/// Start of non-blank list item continuation.
352///
353/// ```markdown
354/// | * a
355/// > | b
356/// ^
357/// ```
358pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
359 let container = &mut tokenizer.tokenize_state.document_container_stack
360 [tokenizer.tokenize_state.document_continued];
361 let size = container.size;
362
363 container.blank_initial = false;
364
365 if matches!(tokenizer.current, Some(b'\t' | b' ')) {
366 // Consume exactly `size`.
367 State::Retry(space_or_tab_min_max(tokenizer, size, size))
368 } else {
369 State::Nok
370 }
371}
372
373/// Find adjacent list items with the same marker.
374pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> {
375 let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
376 let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
377 let mut index = 0;
378 let mut balance = 0;
379
380 // Merge list items.
381 while index < tokenizer.events.len() {
382 let event = &tokenizer.events[index];
383
384 if event.name == Name::ListItem {
385 if event.kind == Kind::Enter {
386 let end = skip::opt(&tokenizer.events, index, &[Name::ListItem]) - 1;
387 let marker = skip::to(&tokenizer.events, index, &[Name::ListItemMarker]);
388 // Guaranteed to be a valid ASCII byte.
389 let marker = tokenizer.parse_state.bytes[tokenizer.events[marker].point.index];
390 let current = (marker, balance, index, end);
391
392 let mut list_index = lists_wip.len();
393 let mut matched = false;
394
395 while list_index > 0 {
396 list_index -= 1;
397 let previous = &lists_wip[list_index];
398 let before = skip::opt(
399 &tokenizer.events,
400 previous.3 + 1,
401 &[
402 Name::SpaceOrTab,
403 Name::LineEnding,
404 Name::BlankLineEnding,
405 Name::BlockQuotePrefix,
406 ],
407 );
408
409 if previous.0 == current.0 && previous.1 == current.1 && before == current.2 {
410 let previous_mut = &mut lists_wip[list_index];
411 previous_mut.3 = current.3;
412 lists.append(&mut lists_wip.split_off(list_index + 1));
413 matched = true;
414 break;
415 }
416 }
417
418 if !matched {
419 let mut index = lists_wip.len();
420 let mut exit = None;
421
422 while index > 0 {
423 index -= 1;
424
425 // If the current (new) item starts after where this
426 // item on the stack ends, we can remove it from the
427 // stack.
428 if current.2 > lists_wip[index].3 {
429 exit = Some(index);
430 } else {
431 break;
432 }
433 }
434
435 if let Some(exit) = exit {
436 lists.append(&mut lists_wip.split_off(exit));
437 }
438
439 lists_wip.push(current);
440 }
441
442 balance += 1;
443 } else {
444 balance -= 1;
445 }
446 }
447
448 index += 1;
449 }
450
451 lists.append(&mut lists_wip);
452
453 // Inject events.
454 let mut index = 0;
455 while index < lists.len() {
456 let list_item = &lists[index];
457 let mut list_start = tokenizer.events[list_item.2].clone();
458 let mut list_end = tokenizer.events[list_item.3].clone();
459 let name = match list_item.0 {
460 b'.' | b')' => Name::ListOrdered,
461 _ => Name::ListUnordered,
462 };
463 list_start.name = name.clone();
464 list_end.name = name;
465
466 tokenizer.map.add(list_item.2, 0, vec![list_start]);
467 tokenizer.map.add(list_item.3 + 1, 0, vec![list_end]);
468
469 index += 1;
470 }
471
472 tokenizer.map.consume(&mut tokenizer.events);
473 None
474}