Markdown parser fork with extended syntax for personal use.
at hack 196 lines 5.9 kB view raw
1//! Title occurs in [definition][] and [label end][label_end]. 2//! 3//! ## Grammar 4//! 5//! Title forms with the following BNF 6//! (<small>see [construct][crate::construct] for character groups</small>): 7//! 8//! ```bnf 9//! ; Restriction: no blank lines. 10//! ; Restriction: markers must match (in case of `(` with `)`). 11//! title ::= marker *(title_byte | title_escape) marker 12//! title_byte ::= code - '\\' - marker 13//! title_escape ::= '\\' ['\\' | marker] 14//! marker ::= '"' | '\'' | '(' 15//! ``` 16//! 17//! Titles can be double quoted (`"a"`), single quoted (`'a'`), or 18//! parenthesized (`(a)`). 19//! 20//! Titles can contain line endings and whitespace, but they are not allowed to 21//! contain blank lines. 22//! They are allowed to be blank themselves. 23//! 24//! The title is interpreted as the [string][] content type. 25//! That means that [character escapes][character_escape] and 26//! [character references][character_reference] are allowed. 27//! 28//! ## References 29//! 30//! * [`micromark-factory-title/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-title/dev/index.js) 31//! 32//! [definition]: crate::construct::definition 33//! [string]: crate::construct::string 34//! [character_escape]: crate::construct::character_escape 35//! [character_reference]: crate::construct::character_reference 36//! [label_end]: crate::construct::label_end 37 38use crate::construct::partial_space_or_tab_eol::{space_or_tab_eol_with_options, Options}; 39use crate::event::{Content, Link, Name}; 40use crate::state::{Name as StateName, State}; 41use crate::subtokenize::link; 42use crate::tokenizer::Tokenizer; 43 44/// Start of title. 45/// 46/// ```markdown 47/// > | "a" 48/// ^ 49/// ``` 50pub fn start(tokenizer: &mut Tokenizer) -> State { 51 match tokenizer.current { 52 Some(b'"' | b'\'' | b'(') => { 53 let marker = tokenizer.current.unwrap(); 54 tokenizer.tokenize_state.marker = if marker == b'(' { b')' } else { marker }; 55 tokenizer.enter(tokenizer.tokenize_state.token_1.clone()); 56 tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); 57 tokenizer.consume(); 58 tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); 59 State::Next(StateName::TitleBegin) 60 } 61 _ => State::Nok, 62 } 63} 64 65/// After opening marker. 66/// 67/// This is also used at the closing marker. 68/// 69/// ```markdown 70/// > | "a" 71/// ^ 72/// ``` 73pub fn begin(tokenizer: &mut Tokenizer) -> State { 74 if tokenizer.current == Some(tokenizer.tokenize_state.marker) { 75 tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); 76 tokenizer.consume(); 77 tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); 78 tokenizer.exit(tokenizer.tokenize_state.token_1.clone()); 79 tokenizer.tokenize_state.marker = 0; 80 tokenizer.tokenize_state.connect = false; 81 State::Ok 82 } else { 83 tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); 84 State::Retry(StateName::TitleAtBreak) 85 } 86} 87 88/// At something, before something else. 89/// 90/// ```markdown 91/// > | "a" 92/// ^ 93/// ``` 94pub fn at_break(tokenizer: &mut Tokenizer) -> State { 95 if let Some(byte) = tokenizer.current { 96 if byte == tokenizer.tokenize_state.marker { 97 tokenizer.exit(tokenizer.tokenize_state.token_3.clone()); 98 State::Retry(StateName::TitleBegin) 99 } else if byte == b'\n' { 100 tokenizer.attempt( 101 State::Next(StateName::TitleAfterEol), 102 State::Next(StateName::TitleNok), 103 ); 104 State::Retry(space_or_tab_eol_with_options( 105 tokenizer, 106 Options { 107 content: Some(Content::String), 108 connect: tokenizer.tokenize_state.connect, 109 }, 110 )) 111 } else { 112 tokenizer.enter_link( 113 Name::Data, 114 Link { 115 previous: None, 116 next: None, 117 content: Content::String, 118 }, 119 ); 120 121 if tokenizer.tokenize_state.connect { 122 let index = tokenizer.events.len() - 1; 123 link(&mut tokenizer.events, index); 124 } else { 125 tokenizer.tokenize_state.connect = true; 126 } 127 128 State::Retry(StateName::TitleInside) 129 } 130 } else { 131 State::Retry(StateName::TitleNok) 132 } 133} 134 135/// In title, after whitespace. 136/// 137/// ```markdown 138/// | "a␊ 139/// > | b" 140/// ^ 141/// ``` 142pub fn after_eol(tokenizer: &mut Tokenizer) -> State { 143 tokenizer.tokenize_state.connect = true; 144 State::Retry(StateName::TitleAtBreak) 145} 146 147/// In title, at something that isn’t allowed. 148/// 149/// ```markdown 150/// > | "a 151/// ^ 152/// ``` 153pub fn nok(tokenizer: &mut Tokenizer) -> State { 154 tokenizer.tokenize_state.marker = 0; 155 tokenizer.tokenize_state.connect = false; 156 State::Nok 157} 158 159/// In text. 160/// 161/// ```markdown 162/// > | "a" 163/// ^ 164/// ``` 165pub fn inside(tokenizer: &mut Tokenizer) -> State { 166 if tokenizer.current == Some(tokenizer.tokenize_state.marker) 167 || matches!(tokenizer.current, None | Some(b'\n')) 168 { 169 tokenizer.exit(Name::Data); 170 State::Retry(StateName::TitleAtBreak) 171 } else { 172 let name = if tokenizer.current == Some(b'\\') { 173 StateName::TitleEscape 174 } else { 175 StateName::TitleInside 176 }; 177 tokenizer.consume(); 178 State::Next(name) 179 } 180} 181 182/// After `\`, at a special character. 183/// 184/// ```markdown 185/// > | "a\*b" 186/// ^ 187/// ``` 188pub fn escape(tokenizer: &mut Tokenizer) -> State { 189 match tokenizer.current { 190 Some(b'"' | b'\'' | b')' | b'\\') => { 191 tokenizer.consume(); 192 State::Next(StateName::TitleInside) 193 } 194 _ => State::Retry(StateName::TitleInside), 195 } 196}