Markdown parser fork with extended syntax for personal use.
at hack 208 lines 6.2 kB view raw
1//! Space or tab (eol) occurs in [destination][], [label][], and [title][]. 2//! 3//! ## Grammar 4//! 5//! Space or tab (eol) forms with the following BNF 6//! (<small>see [construct][crate::construct] for character groups</small>): 7//! 8//! ```bnf 9//! space_or_tab_eol ::= 1*space_or_tab | *space_or_tab eol *space_or_tab 10//! ``` 11//! 12//! Importantly, this allows one line ending, but not blank lines. 13//! 14//! ## References 15//! 16//! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js) 17//! 18//! [destination]: crate::construct::partial_destination 19//! [label]: crate::construct::partial_label 20//! [title]: crate::construct::partial_title 21 22use crate::construct::partial_space_or_tab::{ 23 space_or_tab_with_options, Options as SpaceOrTabOptions, 24}; 25use crate::event::{Content, Link, Name}; 26use crate::state::{Name as StateName, State}; 27use crate::subtokenize::link; 28use crate::tokenizer::Tokenizer; 29 30/// Configuration. 31#[derive(Debug)] 32pub struct Options { 33 /// Connect this whitespace to the previous. 34 pub connect: bool, 35 /// Embedded content type to use. 36 pub content: Option<Content>, 37} 38 39/// `space_or_tab_eol` 40pub fn space_or_tab_eol(tokenizer: &mut Tokenizer) -> StateName { 41 space_or_tab_eol_with_options( 42 tokenizer, 43 Options { 44 content: None, 45 connect: false, 46 }, 47 ) 48} 49 50/// `space_or_tab_eol`, with the given options. 51pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options) -> StateName { 52 tokenizer.tokenize_state.space_or_tab_eol_content = options.content; 53 tokenizer.tokenize_state.space_or_tab_eol_connect = options.connect; 54 StateName::SpaceOrTabEolStart 55} 56 57/// Start of whitespace with at most one eol. 58/// 59/// ```markdown 60/// > | a␠␠b 61/// ^ 62/// > | a␠␠␊ 63/// ^ 64/// | ␠␠b 65/// ``` 66pub fn start(tokenizer: &mut Tokenizer) -> State { 67 match tokenizer.current { 68 Some(b'\t' | b' ') => { 69 tokenizer.attempt( 70 State::Next(StateName::SpaceOrTabEolAfterFirst), 71 State::Next(StateName::SpaceOrTabEolAtEol), 72 ); 73 74 State::Retry(space_or_tab_with_options( 75 tokenizer, 76 SpaceOrTabOptions { 77 kind: Name::SpaceOrTab, 78 min: 1, 79 max: usize::MAX, 80 content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(), 81 connect: tokenizer.tokenize_state.space_or_tab_eol_connect, 82 }, 83 )) 84 } 85 _ => State::Retry(StateName::SpaceOrTabEolAtEol), 86 } 87} 88 89/// After initial whitespace, at optional eol. 90/// 91/// ```markdown 92/// > | a␠␠b 93/// ^ 94/// > | a␠␠␊ 95/// ^ 96/// | ␠␠b 97/// ``` 98pub fn after_first(tokenizer: &mut Tokenizer) -> State { 99 tokenizer.tokenize_state.space_or_tab_eol_ok = true; 100 debug_assert!( 101 tokenizer.tokenize_state.space_or_tab_eol_content.is_none(), 102 "expected no content" 103 ); 104 // If the above ever errors, set `tokenizer.tokenize_state.space_or_tab_eol_connect: true` in that case. 105 State::Retry(StateName::SpaceOrTabEolAtEol) 106} 107 108/// After optional whitespace, at eol. 109/// 110/// ```markdown 111/// > | a␠␠b 112/// ^ 113/// > | a␠␠␊ 114/// ^ 115/// | ␠␠b 116/// > | a␊ 117/// ^ 118/// | ␠␠b 119/// ``` 120pub fn at_eol(tokenizer: &mut Tokenizer) -> State { 121 if let Some(b'\n') = tokenizer.current { 122 if let Some(ref content) = tokenizer.tokenize_state.space_or_tab_eol_content { 123 tokenizer.enter_link( 124 Name::LineEnding, 125 Link { 126 previous: None, 127 next: None, 128 content: content.clone(), 129 }, 130 ); 131 } else { 132 tokenizer.enter(Name::LineEnding); 133 } 134 135 if tokenizer.tokenize_state.space_or_tab_eol_connect { 136 let index = tokenizer.events.len() - 1; 137 link(&mut tokenizer.events, index); 138 } else if tokenizer.tokenize_state.space_or_tab_eol_content.is_some() { 139 tokenizer.tokenize_state.space_or_tab_eol_connect = true; 140 } 141 142 tokenizer.consume(); 143 tokenizer.exit(Name::LineEnding); 144 State::Next(StateName::SpaceOrTabEolAfterEol) 145 } else { 146 let ok = tokenizer.tokenize_state.space_or_tab_eol_ok; 147 tokenizer.tokenize_state.space_or_tab_eol_content = None; 148 tokenizer.tokenize_state.space_or_tab_eol_connect = false; 149 tokenizer.tokenize_state.space_or_tab_eol_ok = false; 150 if ok { 151 State::Ok 152 } else { 153 State::Nok 154 } 155 } 156} 157 158/// After eol. 159/// 160/// ```markdown 161/// | a␠␠␊ 162/// > | ␠␠b 163/// ^ 164/// | a␊ 165/// > | ␠␠b 166/// ^ 167/// ``` 168pub fn after_eol(tokenizer: &mut Tokenizer) -> State { 169 if matches!(tokenizer.current, Some(b'\t' | b' ')) { 170 tokenizer.attempt(State::Next(StateName::SpaceOrTabEolAfterMore), State::Nok); 171 State::Retry(space_or_tab_with_options( 172 tokenizer, 173 SpaceOrTabOptions { 174 kind: Name::SpaceOrTab, 175 min: 1, 176 max: usize::MAX, 177 content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(), 178 connect: tokenizer.tokenize_state.space_or_tab_eol_connect, 179 }, 180 )) 181 } else { 182 State::Retry(StateName::SpaceOrTabEolAfterMore) 183 } 184} 185 186/// After optional final whitespace. 187/// 188/// ```markdown 189/// | a␠␠␊ 190/// > | ␠␠b 191/// ^ 192/// | a␊ 193/// > | ␠␠b 194/// ^ 195/// ``` 196pub fn after_more(tokenizer: &mut Tokenizer) -> State { 197 debug_assert!( 198 !matches!(tokenizer.current, None | Some(b'\n')), 199 "did not expect blank line" 200 ); 201 // If the above ever starts erroring, gracefully `State::Nok` on it. 202 // Currently it doesn’t happen, as we only use this in content, which does 203 // not allow blank lines. 204 tokenizer.tokenize_state.space_or_tab_eol_content = None; 205 tokenizer.tokenize_state.space_or_tab_eol_connect = false; 206 tokenizer.tokenize_state.space_or_tab_eol_ok = false; 207 State::Ok 208}