Markdown parser fork with extended syntax for personal use.
at hack 281 lines 8.9 kB view raw
1//! The text content type. 2//! 3//! **Text** contains phrasing content such as 4//! [attention][crate::construct::attention] (emphasis, gfm strikethrough, strong), 5//! [raw (text)][crate::construct::raw_text] (code (text), math (text)), and actual text. 6//! 7//! The constructs found in text are: 8//! 9//! * [Attention][crate::construct::attention] (emphasis, gfm strikethrough, strong) 10//! * [Autolink][crate::construct::autolink] 11//! * [Character escape][crate::construct::character_escape] 12//! * [Character reference][crate::construct::character_reference] 13//! * [Raw (text)][crate::construct::raw_text] (code (text), math (text)) 14//! * [GFM: Label start (footnote)][crate::construct::gfm_label_start_footnote] 15//! * [GFM: Task list item check][crate::construct::gfm_task_list_item_check] 16//! * [Hard break (escape)][crate::construct::hard_break_escape] 17//! * [HTML (text)][crate::construct::html_text] 18//! * [Label start (image)][crate::construct::label_start_image] 19//! * [Label start (link)][crate::construct::label_start_link] 20//! * [Label end][crate::construct::label_end] 21//! * [MDX: expression (text)][crate::construct::mdx_expression_text] 22//! * [MDX: JSX (text)][crate::construct::mdx_jsx_text] 23//! 24//! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by 25//! > [whitespace][crate::construct::partial_whitespace]. 26 27use crate::construct::gfm_autolink_literal::resolve as resolve_gfm_autolink_literal; 28use crate::construct::partial_whitespace::resolve_whitespace; 29use crate::resolve::Name as ResolveName; 30use crate::state::{Name as StateName, State}; 31use crate::subtokenize::Subresult; 32use crate::tokenizer::Tokenizer; 33 34/// Characters that can start something in text. 35const MARKERS: [u8; 16] = [ 36 b'!', // `label_start_image` 37 b'$', // `raw_text` (math (text)) 38 b'&', // `character_reference` 39 b'*', // `attention` (emphasis, strong) 40 b'<', // `autolink`, `html_text`, `mdx_jsx_text` 41 b'H', // `gfm_autolink_literal` (`protocol` kind) 42 b'W', // `gfm_autolink_literal` (`www.` kind) 43 b'[', // `label_start_link` 44 b'\\', // `character_escape`, `hard_break_escape` 45 b']', // `label_end`, `gfm_label_start_footnote` 46 b'_', // `attention` (emphasis, strong) 47 b'`', // `raw_text` (code (text)) 48 b'h', // `gfm_autolink_literal` (`protocol` kind) 49 b'w', // `gfm_autolink_literal` (`www.` kind) 50 b'{', // `mdx_expression_text` 51 b'~', // `attention` (gfm strikethrough) 52]; 53 54/// Start of text. 55/// 56/// There is a slightly weird case where task list items have their check at 57/// the start of the first paragraph. 58/// So we start by checking for that. 59/// 60/// ```markdown 61/// > | abc 62/// ^ 63/// ``` 64pub fn start(tokenizer: &mut Tokenizer) -> State { 65 tokenizer.tokenize_state.markers = &MARKERS; 66 tokenizer.attempt( 67 State::Next(StateName::TextBefore), 68 State::Next(StateName::TextBefore), 69 ); 70 State::Retry(StateName::GfmTaskListItemCheckStart) 71} 72 73/// Before text. 74/// 75/// ```markdown 76/// > | abc 77/// ^ 78/// ``` 79pub fn before(tokenizer: &mut Tokenizer) -> State { 80 match tokenizer.current { 81 None => { 82 tokenizer.register_resolver(ResolveName::Data); 83 tokenizer.register_resolver(ResolveName::Text); 84 State::Ok 85 } 86 Some(b'!') => { 87 tokenizer.attempt( 88 State::Next(StateName::TextBefore), 89 State::Next(StateName::TextBeforeData), 90 ); 91 State::Retry(StateName::LabelStartImageStart) 92 } 93 // raw (text) (code (text), math (text)) 94 Some(b'$' | b'`') => { 95 tokenizer.attempt( 96 State::Next(StateName::TextBefore), 97 State::Next(StateName::TextBeforeData), 98 ); 99 State::Retry(StateName::RawTextStart) 100 } 101 Some(b'&') => { 102 tokenizer.attempt( 103 State::Next(StateName::TextBefore), 104 State::Next(StateName::TextBeforeData), 105 ); 106 State::Retry(StateName::CharacterReferenceStart) 107 } 108 // attention (emphasis, gfm strikethrough, strong) 109 Some(b'*' | b'_' | b'~') => { 110 tokenizer.attempt( 111 State::Next(StateName::TextBefore), 112 State::Next(StateName::TextBeforeData), 113 ); 114 State::Retry(StateName::AttentionStart) 115 } 116 // `autolink`, `html_text` (order does not matter), `mdx_jsx_text` (order matters). 117 Some(b'<') => { 118 tokenizer.attempt( 119 State::Next(StateName::TextBefore), 120 State::Next(StateName::TextBeforeHtml), 121 ); 122 State::Retry(StateName::AutolinkStart) 123 } 124 Some(b'H' | b'h') => { 125 tokenizer.attempt( 126 State::Next(StateName::TextBefore), 127 State::Next(StateName::TextBeforeData), 128 ); 129 State::Retry(StateName::GfmAutolinkLiteralProtocolStart) 130 } 131 Some(b'W' | b'w') => { 132 tokenizer.attempt( 133 State::Next(StateName::TextBefore), 134 State::Next(StateName::TextBeforeData), 135 ); 136 State::Retry(StateName::GfmAutolinkLiteralWwwStart) 137 } 138 Some(b'[') => { 139 tokenizer.attempt( 140 State::Next(StateName::TextBefore), 141 State::Next(StateName::TextBeforeWikilinkStart), 142 ); 143 State::Retry(StateName::GfmLabelStartFootnoteStart) 144 } 145 Some(b'\\') => { 146 tokenizer.attempt( 147 State::Next(StateName::TextBefore), 148 State::Next(StateName::TextBeforeHardBreakEscape), 149 ); 150 State::Retry(StateName::CharacterEscapeStart) 151 } 152 Some(b']') => { 153 tokenizer.attempt( 154 State::Next(StateName::TextBefore), 155 State::Next(StateName::TextBeforeData), 156 ); 157 State::Retry(StateName::LabelEndStart) 158 } 159 Some(b'{') => { 160 tokenizer.attempt( 161 State::Next(StateName::TextBefore), 162 State::Next(StateName::TextBeforeData), 163 ); 164 State::Retry(StateName::MdxExpressionTextStart) 165 } 166 _ => State::Retry(StateName::TextBeforeData), 167 } 168} 169 170/// Before html (text). 171/// 172/// At `<`, which wasn’t an autolink. 173/// 174/// ```markdown 175/// > | a <b> 176/// ^ 177/// ``` 178pub fn before_html(tokenizer: &mut Tokenizer) -> State { 179 tokenizer.attempt( 180 State::Next(StateName::TextBefore), 181 State::Next(StateName::TextBeforeMdxJsx), 182 ); 183 State::Retry(StateName::HtmlTextStart) 184} 185 186/// Before mdx jsx (text). 187/// 188/// At `<`, which wasn’t an autolink or html. 189/// 190/// ```markdown 191/// > | a <b> 192/// ^ 193/// ``` 194pub fn before_mdx_jsx(tokenizer: &mut Tokenizer) -> State { 195 tokenizer.attempt( 196 State::Next(StateName::TextBefore), 197 State::Next(StateName::TextBeforeData), 198 ); 199 State::Retry(StateName::MdxJsxTextStart) 200} 201 202/// Before hard break escape. 203/// 204/// At `\`, which wasn’t a character escape. 205/// 206/// ```markdown 207/// > | a \␊ 208/// ^ 209/// ``` 210pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { 211 tokenizer.attempt( 212 State::Next(StateName::TextBefore), 213 State::Next(StateName::TextBeforeData), 214 ); 215 State::Retry(StateName::HardBreakEscapeStart) 216} 217 218/// Before wikilink start. 219/// 220/// At `[`, which wasn’t a GFM label start (footnote). 221/// 222/// ```markdown 223/// > | [[a]] 224/// ^ 225/// ``` 226pub fn before_label_wikilink_start(tokenizer: &mut Tokenizer) -> State { 227 tokenizer.attempt( 228 State::Next(StateName::TextBefore), 229 State::Next(StateName::TextBeforeData), 230 ); 231 State::Retry(StateName::WikilinkStart) 232} 233 234/// Before label start (link). 235/// 236/// 237/// At `[`, which wasn’t a GFM label start (footnote). 238/// 239/// ```markdown 240/// > | [a](b) 241/// ^ 242/// ``` 243pub fn before_label_start_link(tokenizer: &mut Tokenizer) -> State { 244 tokenizer.attempt( 245 State::Next(StateName::TextBefore), 246 State::Next(StateName::TextBeforeData), 247 ); 248 State::Retry(StateName::LabelStartLinkStart) 249} 250 251/// Before data. 252/// 253/// ```markdown 254/// > | a 255/// ^ 256/// ``` 257pub fn before_data(tokenizer: &mut Tokenizer) -> State { 258 tokenizer.attempt(State::Next(StateName::TextBefore), State::Nok); 259 State::Retry(StateName::DataStart) 260} 261 262/// Resolve whitespace. 263pub fn resolve(tokenizer: &mut Tokenizer) -> Option<Subresult> { 264 resolve_whitespace( 265 tokenizer, 266 tokenizer.parse_state.options.constructs.hard_break_trailing, 267 true, 268 ); 269 270 if tokenizer 271 .parse_state 272 .options 273 .constructs 274 .gfm_autolink_literal 275 { 276 resolve_gfm_autolink_literal(tokenizer); 277 } 278 279 tokenizer.map.consume(&mut tokenizer.events); 280 None 281}