Markdown parser fork with extended syntax for personal use.
at main 275 lines 10 kB view raw
1//! Deal with content in other content. 2//! 3//! To deal with content in content, *you* (a `markdown-rs` contributor) add 4//! info on events. 5//! Events are a flat list, but they can be connected to each other with a 6//! [`Link`][crate::event::Link]. 7//! Links must occur on [`Enter`][Kind::Enter] events only, which are void 8//! (they are followed by their corresponding [`Exit`][Kind::Exit] event). 9//! 10//! Links will then be passed through a tokenizer for the corresponding content 11//! type by `subtokenize`. 12//! The subevents they result in are split up into slots for each linked event 13//! and replace those links. 14//! 15//! Subevents are not immediately subtokenized as markdown prevents us from 16//! doing so due to definitions, which can occur after references, and thus the 17//! whole document needs to be parsed up to the level of definitions, before 18//! any level that can include references can be parsed. 19 20use crate::event::{Content, Event, Kind, Name, VOID_EVENTS}; 21use crate::message; 22use crate::parser::ParseState; 23use crate::state::{Name as StateName, State}; 24use crate::tokenizer::Tokenizer; 25use crate::util::{edit_map::EditMap, skip}; 26use alloc::{string::String, vec, vec::Vec}; 27 28#[derive(Debug)] 29pub struct Subresult { 30 pub done: bool, 31 pub gfm_footnote_definitions: Vec<String>, 32 pub definitions: Vec<String>, 33} 34 35/// Link two [`Event`][]s. 36/// 37/// Arbitrary (void) events can be linked together. 38/// This optimizes for the common case where the event at `index` is connected 39/// to the previous void event. 40pub fn link(events: &mut [Event], index: usize) { 41 link_to(events, index - 2, index); 42} 43 44/// Link two arbitrary [`Event`][]s together. 45pub fn link_to(events: &mut [Event], previous: usize, next: usize) { 46 debug_assert_eq!(events[previous].kind, Kind::Enter); 47 debug_assert!( 48 VOID_EVENTS.iter().any(|d| d == &events[previous].name), 49 "expected event to be void" 50 ); 51 debug_assert_eq!(events[previous + 1].kind, Kind::Exit); 52 debug_assert_eq!(events[previous].name, events[previous + 1].name); 53 debug_assert_eq!(events[next].kind, Kind::Enter); 54 debug_assert!( 55 VOID_EVENTS.iter().any(|d| d == &events[next].name), 56 "expected event to be void" 57 ); 58 // Note: the exit of this event may not exist, so don’t check for that. 59 60 let link_previous = events[previous] 61 .link 62 .as_mut() 63 .expect("expected `link` on previous"); 64 link_previous.next = Some(next); 65 let link_next = events[next].link.as_mut().expect("expected `link` on next"); 66 link_next.previous = Some(previous); 67 68 debug_assert_eq!( 69 events[previous].link.as_ref().unwrap().content, 70 events[next].link.as_ref().unwrap().content, 71 "expected `content` to match" 72 ); 73} 74 75/// Parse linked events. 76/// 77/// Supposed to be called repeatedly, returns `true` when done. 78pub fn subtokenize( 79 events: &mut Vec<Event>, 80 parse_state: &ParseState, 81 filter: Option<&Content>, 82) -> Result<Subresult, message::Message> { 83 let mut map = EditMap::new(); 84 let mut index = 0; 85 let mut value = Subresult { 86 done: true, 87 gfm_footnote_definitions: vec![], 88 definitions: vec![], 89 }; 90 let mut acc = (0, 0); 91 92 while index < events.len() { 93 let event = &events[index]; 94 95 // Find each first opening chunk. 96 if let Some(ref link) = event.link { 97 debug_assert_eq!(event.kind, Kind::Enter); 98 99 // No need to enter linked events again. 100 if link.previous.is_none() 101 && (filter.is_none() || &link.content == *filter.as_ref().unwrap()) 102 { 103 // Index into `events` pointing to a chunk. 104 let mut link_index = Some(index); 105 // Subtokenizer. 106 let mut tokenizer = Tokenizer::new(event.point.clone(), parse_state); 107 debug_assert!( 108 !matches!(link.content, Content::Flow), 109 "cannot use flow as subcontent yet" 110 ); 111 // Substate. 112 let mut state = State::Next(match link.content { 113 Content::Content => StateName::ContentDefinitionBefore, 114 Content::String => StateName::StringStart, 115 _ => StateName::TextStart, 116 }); 117 118 // Check if this is the first paragraph, after zero or more 119 // definitions (or a blank line), in a list item. 120 // Used for GFM task list items. 121 if tokenizer.parse_state.options.constructs.gfm_task_list_item 122 && index > 2 123 && events[index - 1].kind == Kind::Enter 124 && events[index - 1].name == Name::Paragraph 125 { 126 let before = skip::opt_back( 127 events, 128 index - 2, 129 &[ 130 Name::BlankLineEnding, 131 Name::Definition, 132 Name::LineEnding, 133 Name::SpaceOrTab, 134 ], 135 ); 136 137 if events[before].kind == Kind::Exit 138 && events[before].name == Name::ListItemPrefix 139 { 140 tokenizer 141 .tokenize_state 142 .document_at_first_paragraph_of_list_item = true; 143 } 144 } 145 146 // Loop through links to pass them in order to the subtokenizer. 147 while let Some(index) = link_index { 148 let enter = &events[index]; 149 let link_curr = enter.link.as_ref().expect("expected link"); 150 debug_assert_eq!(enter.kind, Kind::Enter); 151 152 if link_curr.previous.is_some() { 153 tokenizer.define_skip(enter.point.clone()); 154 } 155 156 let end = &events[index + 1].point; 157 158 state = tokenizer.push( 159 (enter.point.index, enter.point.vs), 160 (end.index, end.vs), 161 state, 162 ); 163 164 link_index = link_curr.next; 165 } 166 167 let mut result = tokenizer.flush(state, true)?; 168 value 169 .gfm_footnote_definitions 170 .append(&mut result.gfm_footnote_definitions); 171 value.definitions.append(&mut result.definitions); 172 value.done = false; 173 174 acc = divide_events(&mut map, events, index, &mut tokenizer.events, acc); 175 } 176 } 177 178 index += 1; 179 } 180 181 map.consume(events); 182 183 Ok(value) 184} 185 186/// Divide `child_events` over links in `events`, the first of which is at 187/// `link_index`. 188pub fn divide_events( 189 map: &mut EditMap, 190 events: &[Event], 191 mut link_index: usize, 192 child_events: &mut Vec<Event>, 193 acc_before: (usize, usize), 194) -> (usize, usize) { 195 // Loop through `child_events` to figure out which parts belong where and 196 // fix deep links. 197 let mut child_index = 0; 198 let mut slices = vec![]; 199 let mut slice_start = 0; 200 let mut old_prev: Option<usize> = None; 201 let len = child_events.len(); 202 203 while child_index < len { 204 let current = &child_events[child_index].point; 205 let end = &events[link_index + 1].point; 206 207 // Find the first event that starts after the end we’re looking 208 // for. 209 if current.index > end.index || (current.index == end.index && current.vs > end.vs) { 210 slices.push((link_index, slice_start)); 211 slice_start = child_index; 212 link_index = events[link_index].link.as_ref().unwrap().next.unwrap(); 213 } 214 215 // Fix sublinks. 216 if let Some(sublink_curr) = &child_events[child_index].link { 217 if sublink_curr.previous.is_some() { 218 let old_prev = old_prev.unwrap(); 219 let prev_event = &mut child_events[old_prev]; 220 // The `index` in `events` where the current link is, 221 // minus one to get the previous link, 222 // minus 2 events (the enter and exit) for each removed 223 // link. 224 let new_link = if slices.is_empty() { 225 old_prev + link_index + 2 226 } else { 227 old_prev + link_index - (slices.len() - 1) * 2 228 }; 229 prev_event.link.as_mut().unwrap().next = 230 Some(new_link + acc_before.1 - acc_before.0); 231 } 232 } 233 234 // If there is a `next` link in the subevents, we have to change 235 // its `previous` index to account for the shifted events. 236 // If it points to a next event, we also change the next event’s 237 // reference back to *this* event. 238 if let Some(sublink_curr) = &child_events[child_index].link { 239 if let Some(next) = sublink_curr.next { 240 let sublink_next = child_events[next].link.as_mut().unwrap(); 241 242 old_prev = sublink_next.previous; 243 244 sublink_next.previous = sublink_next 245 .previous 246 // The `index` in `events` where the current link is, 247 // minus 2 events (the enter and exit) for each removed 248 // link. 249 .map(|previous| { 250 previous + link_index - (slices.len() * 2) + acc_before.1 - acc_before.0 251 }); 252 } 253 } 254 255 child_index += 1; 256 } 257 258 if !child_events.is_empty() { 259 slices.push((link_index, slice_start)); 260 } 261 262 // Finally, inject the subevents. 263 let mut index = slices.len(); 264 265 while index > 0 { 266 index -= 1; 267 debug_assert!( 268 slices[index].0 < events.len(), 269 "expected slice start in bounds" 270 ); 271 map.add(slices[index].0, 2, child_events.split_off(slices[index].1)); 272 } 273 274 (acc_before.0 + (slices.len() * 2), acc_before.1 + len) 275}