Markdown parser fork with extended syntax for personal use.
at hack 189 lines 5.5 kB view raw
1//! Content occurs in the [flow][] content type. 2//! 3//! Content contains zero or more [definition][definition]s, followed by zero 4//! or one [paragraph][]. 5//! 6//! The constructs found in flow are: 7//! 8//! * [Definition][crate::construct::definition] 9//! * [Paragraph][crate::construct::paragraph] 10//! 11//! ## Tokens 12//! 13//! * [`Content`][Name::Content] 14//! 15//! > 👉 **Note**: while parsing, [`Content`][Name::Content] 16//! > is used, which is later compiled away. 17//! 18//! ## References 19//! 20//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js) 21//! 22//! [flow]: crate::construct::flow 23//! [definition]: crate::construct::definition 24//! [paragraph]: crate::construct::paragraph 25 26use crate::event::{Content, Kind, Link, Name}; 27use crate::message; 28use crate::resolve::Name as ResolveName; 29use crate::state::{Name as StateName, State}; 30use crate::subtokenize::{subtokenize, Subresult}; 31use crate::tokenizer::Tokenizer; 32use alloc::vec; 33 34/// Before a content chunk. 35/// 36/// ```markdown 37/// > | abc 38/// ^ 39/// ``` 40pub fn chunk_start(tokenizer: &mut Tokenizer) -> State { 41 match tokenizer.current { 42 None | Some(b'\n') => unreachable!("unexpected eol/eof"), 43 _ => { 44 tokenizer.enter_link( 45 Name::Content, 46 Link { 47 previous: None, 48 next: None, 49 content: Content::Content, 50 }, 51 ); 52 State::Retry(StateName::ContentChunkInside) 53 } 54 } 55} 56 57/// In a content chunk. 58/// 59/// ```markdown 60/// > | abc 61/// ^^^ 62/// ``` 63pub fn chunk_inside(tokenizer: &mut Tokenizer) -> State { 64 match tokenizer.current { 65 None | Some(b'\n') => { 66 tokenizer.exit(Name::Content); 67 tokenizer.register_resolver_before(ResolveName::Content); 68 // You’d be interrupting. 69 tokenizer.interrupt = true; 70 State::Ok 71 } 72 _ => { 73 tokenizer.consume(); 74 State::Next(StateName::ContentChunkInside) 75 } 76 } 77} 78 79/// Before a definition. 80/// 81/// ```markdown 82/// > | [a]: b 83/// ^ 84/// ``` 85pub fn definition_before(tokenizer: &mut Tokenizer) -> State { 86 tokenizer.attempt( 87 State::Next(StateName::ContentDefinitionAfter), 88 State::Next(StateName::ParagraphStart), 89 ); 90 State::Retry(StateName::DefinitionStart) 91} 92 93/// After a definition. 94/// 95/// ```markdown 96/// > | [a]: b 97/// ^ 98/// | c 99/// ``` 100pub fn definition_after(tokenizer: &mut Tokenizer) -> State { 101 debug_assert!(matches!(tokenizer.current, None | Some(b'\n'))); 102 if tokenizer.current.is_none() { 103 State::Ok 104 } else { 105 tokenizer.enter(Name::LineEnding); 106 tokenizer.consume(); 107 tokenizer.exit(Name::LineEnding); 108 State::Next(StateName::ContentDefinitionBefore) 109 } 110} 111 112/// Merge `Content` chunks, which currently span a single line, into actual 113/// `Content`s that span multiple lines. 114pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, message::Message> { 115 let mut index = 0; 116 117 while index < tokenizer.events.len() { 118 let event = &tokenizer.events[index]; 119 120 if event.kind == Kind::Enter && event.name == Name::Content { 121 // Exit:Content 122 let mut exit_index = index + 1; 123 124 loop { 125 let mut enter_index = exit_index + 1; 126 127 if enter_index == tokenizer.events.len() 128 || tokenizer.events[enter_index].name != Name::LineEnding 129 { 130 break; 131 } 132 133 // Skip past line ending. 134 enter_index += 2; 135 136 // Skip past prefix. 137 while enter_index < tokenizer.events.len() { 138 let event = &tokenizer.events[enter_index]; 139 140 if event.name != Name::SpaceOrTab 141 && event.name != Name::BlockQuotePrefix 142 && event.name != Name::BlockQuoteMarker 143 { 144 break; 145 } 146 147 enter_index += 1; 148 } 149 150 if enter_index == tokenizer.events.len() 151 || tokenizer.events[enter_index].name != Name::Content 152 { 153 break; 154 } 155 156 // Set Exit:Content point to Exit:LineEnding. 157 tokenizer.events[exit_index].point = tokenizer.events[exit_index + 2].point.clone(); 158 // Remove Enter:LineEnding, Exit:LineEnding. 159 tokenizer.map.add(exit_index + 1, 2, vec![]); 160 161 // Link Enter:Content to Enter:Content on this line and vice versa. 162 tokenizer.events[exit_index - 1].link.as_mut().unwrap().next = Some(enter_index); 163 tokenizer.events[enter_index] 164 .link 165 .as_mut() 166 .unwrap() 167 .previous = Some(exit_index - 1); 168 169 // Potential next start. 170 exit_index = enter_index + 1; 171 } 172 173 // Move to `Exit:Content`. 174 index = exit_index; 175 } 176 177 index += 1; 178 } 179 180 tokenizer.map.consume(&mut tokenizer.events); 181 182 let result = subtokenize( 183 &mut tokenizer.events, 184 tokenizer.parse_state, 185 Some(&Content::Content), 186 )?; 187 188 Ok(Some(result)) 189}