Markdown parser fork with extended syntax for personal use.
1//! Deal with content in other content.
2//!
3//! To deal with content in content, *you* (a `markdown-rs` contributor) add
4//! info on events.
5//! Events are a flat list, but they can be connected to each other with a
6//! [`Link`][crate::event::Link].
7//! Links must occur on [`Enter`][Kind::Enter] events only, which are void
8//! (they are followed by their corresponding [`Exit`][Kind::Exit] event).
9//!
10//! Links will then be passed through a tokenizer for the corresponding content
11//! type by `subtokenize`.
12//! The subevents they result in are split up into slots for each linked event
13//! and replace those links.
14//!
15//! Subevents are not immediately subtokenized as markdown prevents us from
16//! doing so due to definitions, which can occur after references, and thus the
17//! whole document needs to be parsed up to the level of definitions, before
18//! any level that can include references can be parsed.
19
20use crate::event::{Content, Event, Kind, Name, VOID_EVENTS};
21use crate::message;
22use crate::parser::ParseState;
23use crate::state::{Name as StateName, State};
24use crate::tokenizer::Tokenizer;
25use crate::util::{edit_map::EditMap, skip};
26use alloc::{string::String, vec, vec::Vec};
27
28#[derive(Debug)]
29pub struct Subresult {
30 pub done: bool,
31 pub gfm_footnote_definitions: Vec<String>,
32 pub definitions: Vec<String>,
33}
34
35/// Link two [`Event`][]s.
36///
37/// Arbitrary (void) events can be linked together.
38/// This optimizes for the common case where the event at `index` is connected
39/// to the previous void event.
40pub fn link(events: &mut [Event], index: usize) {
41 link_to(events, index - 2, index);
42}
43
44/// Link two arbitrary [`Event`][]s together.
45pub fn link_to(events: &mut [Event], previous: usize, next: usize) {
46 debug_assert_eq!(events[previous].kind, Kind::Enter);
47 debug_assert!(
48 VOID_EVENTS.iter().any(|d| d == &events[previous].name),
49 "expected event to be void"
50 );
51 debug_assert_eq!(events[previous + 1].kind, Kind::Exit);
52 debug_assert_eq!(events[previous].name, events[previous + 1].name);
53 debug_assert_eq!(events[next].kind, Kind::Enter);
54 debug_assert!(
55 VOID_EVENTS.iter().any(|d| d == &events[next].name),
56 "expected event to be void"
57 );
58 // Note: the exit of this event may not exist, so don’t check for that.
59
60 let link_previous = events[previous]
61 .link
62 .as_mut()
63 .expect("expected `link` on previous");
64 link_previous.next = Some(next);
65 let link_next = events[next].link.as_mut().expect("expected `link` on next");
66 link_next.previous = Some(previous);
67
68 debug_assert_eq!(
69 events[previous].link.as_ref().unwrap().content,
70 events[next].link.as_ref().unwrap().content,
71 "expected `content` to match"
72 );
73}
74
75/// Parse linked events.
76///
77/// Supposed to be called repeatedly, returns `true` when done.
78pub fn subtokenize(
79 events: &mut Vec<Event>,
80 parse_state: &ParseState,
81 filter: Option<&Content>,
82) -> Result<Subresult, message::Message> {
83 let mut map = EditMap::new();
84 let mut index = 0;
85 let mut value = Subresult {
86 done: true,
87 gfm_footnote_definitions: vec![],
88 definitions: vec![],
89 };
90 let mut acc = (0, 0);
91
92 while index < events.len() {
93 let event = &events[index];
94
95 // Find each first opening chunk.
96 if let Some(ref link) = event.link {
97 debug_assert_eq!(event.kind, Kind::Enter);
98
99 // No need to enter linked events again.
100 if link.previous.is_none()
101 && (filter.is_none() || &link.content == *filter.as_ref().unwrap())
102 {
103 // Index into `events` pointing to a chunk.
104 let mut link_index = Some(index);
105 // Subtokenizer.
106 let mut tokenizer = Tokenizer::new(event.point.clone(), parse_state);
107 debug_assert!(
108 !matches!(link.content, Content::Flow),
109 "cannot use flow as subcontent yet"
110 );
111 // Substate.
112 let mut state = State::Next(match link.content {
113 Content::Content => StateName::ContentDefinitionBefore,
114 Content::String => StateName::StringStart,
115 _ => StateName::TextStart,
116 });
117
118 // Check if this is the first paragraph, after zero or more
119 // definitions (or a blank line), in a list item.
120 // Used for GFM task list items.
121 if tokenizer.parse_state.options.constructs.gfm_task_list_item
122 && index > 2
123 && events[index - 1].kind == Kind::Enter
124 && events[index - 1].name == Name::Paragraph
125 {
126 let before = skip::opt_back(
127 events,
128 index - 2,
129 &[
130 Name::BlankLineEnding,
131 Name::Definition,
132 Name::LineEnding,
133 Name::SpaceOrTab,
134 ],
135 );
136
137 if events[before].kind == Kind::Exit
138 && events[before].name == Name::ListItemPrefix
139 {
140 tokenizer
141 .tokenize_state
142 .document_at_first_paragraph_of_list_item = true;
143 }
144 }
145
146 // Loop through links to pass them in order to the subtokenizer.
147 while let Some(index) = link_index {
148 let enter = &events[index];
149 let link_curr = enter.link.as_ref().expect("expected link");
150 debug_assert_eq!(enter.kind, Kind::Enter);
151
152 if link_curr.previous.is_some() {
153 tokenizer.define_skip(enter.point.clone());
154 }
155
156 let end = &events[index + 1].point;
157
158 state = tokenizer.push(
159 (enter.point.index, enter.point.vs),
160 (end.index, end.vs),
161 state,
162 );
163
164 link_index = link_curr.next;
165 }
166
167 let mut result = tokenizer.flush(state, true)?;
168 value
169 .gfm_footnote_definitions
170 .append(&mut result.gfm_footnote_definitions);
171 value.definitions.append(&mut result.definitions);
172 value.done = false;
173
174 acc = divide_events(&mut map, events, index, &mut tokenizer.events, acc);
175 }
176 }
177
178 index += 1;
179 }
180
181 map.consume(events);
182
183 Ok(value)
184}
185
186/// Divide `child_events` over links in `events`, the first of which is at
187/// `link_index`.
188pub fn divide_events(
189 map: &mut EditMap,
190 events: &[Event],
191 mut link_index: usize,
192 child_events: &mut Vec<Event>,
193 acc_before: (usize, usize),
194) -> (usize, usize) {
195 // Loop through `child_events` to figure out which parts belong where and
196 // fix deep links.
197 let mut child_index = 0;
198 let mut slices = vec![];
199 let mut slice_start = 0;
200 let mut old_prev: Option<usize> = None;
201 let len = child_events.len();
202
203 while child_index < len {
204 let current = &child_events[child_index].point;
205 let end = &events[link_index + 1].point;
206
207 // Find the first event that starts after the end we’re looking
208 // for.
209 if current.index > end.index || (current.index == end.index && current.vs > end.vs) {
210 slices.push((link_index, slice_start));
211 slice_start = child_index;
212 link_index = events[link_index].link.as_ref().unwrap().next.unwrap();
213 }
214
215 // Fix sublinks.
216 if let Some(sublink_curr) = &child_events[child_index].link {
217 if sublink_curr.previous.is_some() {
218 let old_prev = old_prev.unwrap();
219 let prev_event = &mut child_events[old_prev];
220 // The `index` in `events` where the current link is,
221 // minus one to get the previous link,
222 // minus 2 events (the enter and exit) for each removed
223 // link.
224 let new_link = if slices.is_empty() {
225 old_prev + link_index + 2
226 } else {
227 old_prev + link_index - (slices.len() - 1) * 2
228 };
229 prev_event.link.as_mut().unwrap().next =
230 Some(new_link + acc_before.1 - acc_before.0);
231 }
232 }
233
234 // If there is a `next` link in the subevents, we have to change
235 // its `previous` index to account for the shifted events.
236 // If it points to a next event, we also change the next event’s
237 // reference back to *this* event.
238 if let Some(sublink_curr) = &child_events[child_index].link {
239 if let Some(next) = sublink_curr.next {
240 let sublink_next = child_events[next].link.as_mut().unwrap();
241
242 old_prev = sublink_next.previous;
243
244 sublink_next.previous = sublink_next
245 .previous
246 // The `index` in `events` where the current link is,
247 // minus 2 events (the enter and exit) for each removed
248 // link.
249 .map(|previous| {
250 previous + link_index - (slices.len() * 2) + acc_before.1 - acc_before.0
251 });
252 }
253 }
254
255 child_index += 1;
256 }
257
258 if !child_events.is_empty() {
259 slices.push((link_index, slice_start));
260 }
261
262 // Finally, inject the subevents.
263 let mut index = slices.len();
264
265 while index > 0 {
266 index -= 1;
267 debug_assert!(
268 slices[index].0 < events.len(),
269 "expected slice start in bounds"
270 );
271 map.add(slices[index].0, 2, child_events.split_off(slices[index].1));
272 }
273
274 (acc_before.0 + (slices.len() * 2), acc_before.1 + len)
275}