Markdown parser fork with extended syntax for personal use.
1//! Title occurs in [definition][] and [label end][label_end].
2//!
3//! ## Grammar
4//!
5//! Title forms with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! ; Restriction: no blank lines.
10//! ; Restriction: markers must match (in case of `(` with `)`).
11//! title ::= marker *(title_byte | title_escape) marker
12//! title_byte ::= code - '\\' - marker
13//! title_escape ::= '\\' ['\\' | marker]
14//! marker ::= '"' | '\'' | '('
15//! ```
16//!
17//! Titles can be double quoted (`"a"`), single quoted (`'a'`), or
18//! parenthesized (`(a)`).
19//!
20//! Titles can contain line endings and whitespace, but they are not allowed to
21//! contain blank lines.
22//! They are allowed to be blank themselves.
23//!
24//! The title is interpreted as the [string][] content type.
25//! That means that [character escapes][character_escape] and
26//! [character references][character_reference] are allowed.
27//!
28//! ## References
29//!
30//! * [`micromark-factory-title/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-title/dev/index.js)
31//!
32//! [definition]: crate::construct::definition
33//! [string]: crate::construct::string
34//! [character_escape]: crate::construct::character_escape
35//! [character_reference]: crate::construct::character_reference
36//! [label_end]: crate::construct::label_end
37
38use crate::construct::partial_space_or_tab_eol::{space_or_tab_eol_with_options, Options};
39use crate::event::{Content, Link, Name};
40use crate::state::{Name as StateName, State};
41use crate::subtokenize::link;
42use crate::tokenizer::Tokenizer;
43
44/// Start of title.
45///
46/// ```markdown
47/// > | "a"
48/// ^
49/// ```
50pub fn start(tokenizer: &mut Tokenizer) -> State {
51 match tokenizer.current {
52 Some(b'"' | b'\'' | b'(') => {
53 let marker = tokenizer.current.unwrap();
54 tokenizer.tokenize_state.marker = if marker == b'(' { b')' } else { marker };
55 tokenizer.enter(tokenizer.tokenize_state.token_1.clone());
56 tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
57 tokenizer.consume();
58 tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
59 State::Next(StateName::TitleBegin)
60 }
61 _ => State::Nok,
62 }
63}
64
65/// After opening marker.
66///
67/// This is also used at the closing marker.
68///
69/// ```markdown
70/// > | "a"
71/// ^
72/// ```
73pub fn begin(tokenizer: &mut Tokenizer) -> State {
74 if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
75 tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
76 tokenizer.consume();
77 tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
78 tokenizer.exit(tokenizer.tokenize_state.token_1.clone());
79 tokenizer.tokenize_state.marker = 0;
80 tokenizer.tokenize_state.connect = false;
81 State::Ok
82 } else {
83 tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
84 State::Retry(StateName::TitleAtBreak)
85 }
86}
87
88/// At something, before something else.
89///
90/// ```markdown
91/// > | "a"
92/// ^
93/// ```
94pub fn at_break(tokenizer: &mut Tokenizer) -> State {
95 if let Some(byte) = tokenizer.current {
96 if byte == tokenizer.tokenize_state.marker {
97 tokenizer.exit(tokenizer.tokenize_state.token_3.clone());
98 State::Retry(StateName::TitleBegin)
99 } else if byte == b'\n' {
100 tokenizer.attempt(
101 State::Next(StateName::TitleAfterEol),
102 State::Next(StateName::TitleNok),
103 );
104 State::Retry(space_or_tab_eol_with_options(
105 tokenizer,
106 Options {
107 content: Some(Content::String),
108 connect: tokenizer.tokenize_state.connect,
109 },
110 ))
111 } else {
112 tokenizer.enter_link(
113 Name::Data,
114 Link {
115 previous: None,
116 next: None,
117 content: Content::String,
118 },
119 );
120
121 if tokenizer.tokenize_state.connect {
122 let index = tokenizer.events.len() - 1;
123 link(&mut tokenizer.events, index);
124 } else {
125 tokenizer.tokenize_state.connect = true;
126 }
127
128 State::Retry(StateName::TitleInside)
129 }
130 } else {
131 State::Retry(StateName::TitleNok)
132 }
133}
134
135/// In title, after whitespace.
136///
137/// ```markdown
138/// | "a␊
139/// > | b"
140/// ^
141/// ```
142pub fn after_eol(tokenizer: &mut Tokenizer) -> State {
143 tokenizer.tokenize_state.connect = true;
144 State::Retry(StateName::TitleAtBreak)
145}
146
147/// In title, at something that isn’t allowed.
148///
149/// ```markdown
150/// > | "a
151/// ^
152/// ```
153pub fn nok(tokenizer: &mut Tokenizer) -> State {
154 tokenizer.tokenize_state.marker = 0;
155 tokenizer.tokenize_state.connect = false;
156 State::Nok
157}
158
159/// In text.
160///
161/// ```markdown
162/// > | "a"
163/// ^
164/// ```
165pub fn inside(tokenizer: &mut Tokenizer) -> State {
166 if tokenizer.current == Some(tokenizer.tokenize_state.marker)
167 || matches!(tokenizer.current, None | Some(b'\n'))
168 {
169 tokenizer.exit(Name::Data);
170 State::Retry(StateName::TitleAtBreak)
171 } else {
172 let name = if tokenizer.current == Some(b'\\') {
173 StateName::TitleEscape
174 } else {
175 StateName::TitleInside
176 };
177 tokenizer.consume();
178 State::Next(name)
179 }
180}
181
182/// After `\`, at a special character.
183///
184/// ```markdown
185/// > | "a\*b"
186/// ^
187/// ```
188pub fn escape(tokenizer: &mut Tokenizer) -> State {
189 match tokenizer.current {
190 Some(b'"' | b'\'' | b')' | b'\\') => {
191 tokenizer.consume();
192 State::Next(StateName::TitleInside)
193 }
194 _ => State::Retry(StateName::TitleInside),
195 }
196}