Markdown parser fork with extended syntax for personal use.
at hack 220 lines 7.5 kB view raw
1//! Label occurs in [definition][] and [label end][label_end]. 2//! 3//! ## Grammar 4//! 5//! Label forms with the following BNF 6//! (<small>see [construct][crate::construct] for character groups</small>): 7//! 8//! ```bnf 9//! ; Restriction: maximum `999` codes allowed between brackets. 10//! ; Restriction: no blank lines. 11//! ; Restriction: at least 1 `text` byte must exist. 12//! label ::= '[' *(label_byte | label_escape) ']' 13//! label_byte ::= code - '[' - '\\' - ']' 14//! label_escape ::= '\\' ['[' | '\\' | ']'] 15//! ``` 16//! 17//! The maximum allowed size of the label, without the brackets, is `999` 18//! (inclusive), which is defined in 19//! [`LINK_REFERENCE_SIZE_MAX`][]. 20//! 21//! Labels can contain line endings and whitespace, but they are not allowed to 22//! contain blank lines, and they must not be blank themselves. 23//! 24//! The label is interpreted as the [string][] content type. 25//! That means that [character escapes][character_escape] and 26//! [character references][character_reference] are allowed. 27//! 28//! > 👉 **Note**: this label relates to, but is not, the initial “label” of 29//! > what is know as a reference in markdown: 30//! > 31//! > | Kind | Link | Image | 32//! > | --------- | -------- | --------- | 33//! > | Shortcut | `[x]` | `![x]` | 34//! > | Collapsed | `[x][]` | `![x][]` | 35//! > | Full | `[x][y]` | `![x][y]` | 36//! > 37//! > The 6 above things are references, in the three kinds they come in, as 38//! > links and images. 39//! > The label that this module focusses on is only the thing that contains 40//! > `y`. 41//! > 42//! > The thing that contains `x` is not a single thing when parsing markdown, 43//! > but instead constists of an opening 44//! > ([label start (image)][label_start_image] or 45//! > [label start (link)][label_start_link]) and a closing 46//! > ([label end][label_end]), so as to allow further phrasing such as 47//! > [code (text)][raw_text] or [attention][]. 48//! 49//! ## References 50//! 51//! * [`micromark-factory-label/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-label/dev/index.js) 52//! 53//! [definition]: crate::construct::definition 54//! [string]: crate::construct::string 55//! [attention]: crate::construct::attention 56//! [character_escape]: crate::construct::character_escape 57//! [character_reference]: crate::construct::character_reference 58//! [label_start_image]: crate::construct::label_start_image 59//! [label_start_link]: crate::construct::label_start_link 60//! [label_end]: crate::construct::label_end 61//! [raw_text]: crate::construct::raw_text 62//! [link_reference_size_max]: crate::util::constant::LINK_REFERENCE_SIZE_MAX 63 64use crate::construct::partial_space_or_tab_eol::{space_or_tab_eol_with_options, Options}; 65use crate::event::{Content, Link, Name}; 66use crate::state::{Name as StateName, State}; 67use crate::subtokenize::link; 68use crate::tokenizer::Tokenizer; 69use crate::util::constant::LINK_REFERENCE_SIZE_MAX; 70 71/// Start of label. 72/// 73/// ```markdown 74/// > | [a] 75/// ^ 76/// ``` 77pub fn start(tokenizer: &mut Tokenizer) -> State { 78 debug_assert_eq!(tokenizer.current, Some(b'['), "expected `[`"); 79 tokenizer.enter(tokenizer.tokenize_state.token_1.clone()); 80 tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); 81 tokenizer.consume(); 82 tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); 83 tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); 84 State::Next(StateName::LabelAtBreak) 85} 86 87/// In label, at something, before something else. 88/// 89/// ```markdown 90/// > | [a] 91/// ^ 92/// ``` 93pub fn at_break(tokenizer: &mut Tokenizer) -> State { 94 if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX 95 || matches!(tokenizer.current, None | Some(b'[')) 96 || (matches!(tokenizer.current, Some(b']')) && !tokenizer.tokenize_state.seen) 97 { 98 State::Retry(StateName::LabelNok) 99 } else { 100 match tokenizer.current { 101 Some(b'\n') => { 102 tokenizer.attempt( 103 State::Next(StateName::LabelEolAfter), 104 State::Next(StateName::LabelNok), 105 ); 106 State::Retry(space_or_tab_eol_with_options( 107 tokenizer, 108 Options { 109 content: Some(Content::String), 110 connect: tokenizer.tokenize_state.connect, 111 }, 112 )) 113 } 114 Some(b']') => { 115 tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); 116 tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); 117 tokenizer.consume(); 118 tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); 119 tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); 120 tokenizer.tokenize_state.connect = false; 121 tokenizer.tokenize_state.seen = false; 122 tokenizer.tokenize_state.size = 0; 123 State::Ok 124 } 125 _ => { 126 tokenizer.enter_link( 127 Name::Data, 128 Link { 129 previous: None, 130 next: None, 131 content: Content::String, 132 }, 133 ); 134 135 if tokenizer.tokenize_state.connect { 136 let index = tokenizer.events.len() - 1; 137 link(&mut tokenizer.events, index); 138 } else { 139 tokenizer.tokenize_state.connect = true; 140 } 141 142 State::Retry(StateName::LabelInside) 143 } 144 } 145 } 146} 147 148/// In label, after whitespace. 149/// 150/// ```markdown 151/// | [a␊ 152/// > | b] 153/// ^ 154/// ``` 155pub fn eol_after(tokenizer: &mut Tokenizer) -> State { 156 tokenizer.tokenize_state.connect = true; 157 State::Retry(StateName::LabelAtBreak) 158} 159 160/// In label, on something disallowed. 161/// 162/// ```markdown 163/// > | [] 164/// ^ 165/// ``` 166pub fn nok(tokenizer: &mut Tokenizer) -> State { 167 tokenizer.tokenize_state.connect = false; 168 tokenizer.tokenize_state.seen = false; 169 tokenizer.tokenize_state.size = 0; 170 State::Nok 171} 172 173/// In label, in text. 174/// 175/// ```markdown 176/// > | [a] 177/// ^ 178/// ``` 179pub fn inside(tokenizer: &mut Tokenizer) -> State { 180 match tokenizer.current { 181 None | Some(b'\n' | b'[' | b']') => { 182 tokenizer.exit(Name::Data); 183 State::Retry(StateName::LabelAtBreak) 184 } 185 Some(byte) => { 186 if tokenizer.tokenize_state.size > LINK_REFERENCE_SIZE_MAX { 187 tokenizer.exit(Name::Data); 188 State::Retry(StateName::LabelAtBreak) 189 } else { 190 tokenizer.consume(); 191 tokenizer.tokenize_state.size += 1; 192 if !tokenizer.tokenize_state.seen && !matches!(byte, b'\t' | b' ') { 193 tokenizer.tokenize_state.seen = true; 194 } 195 State::Next(if matches!(byte, b'\\') { 196 StateName::LabelEscape 197 } else { 198 StateName::LabelInside 199 }) 200 } 201 } 202 } 203} 204 205/// After `\`, at a special character. 206/// 207/// ```markdown 208/// > | [a\*a] 209/// ^ 210/// ``` 211pub fn escape(tokenizer: &mut Tokenizer) -> State { 212 match tokenizer.current { 213 Some(b'[' | b'\\' | b']') => { 214 tokenizer.consume(); 215 tokenizer.tokenize_state.size += 1; 216 State::Next(StateName::LabelInside) 217 } 218 _ => State::Retry(StateName::LabelInside), 219 } 220}