src/construct/code_indented.rs at hack · crashkeys.dev/markdown-rs

crashkeys.dev / markdown-rs
fork atom
Markdown parser fork with extended syntax for personal use.
fork atom
markdown-rs / src / construct / code_indented.rs
at hack 192 lines 5.8 kB view raw
wrap content
Titus Wormer Refactor docs 11mo ago
e0ca3f6c
  1//! Code (indented) occurs in the [flow][] content type.
  2//!
  3//! ## Grammar
  4//!
  5//! Code (indented) forms with the following BNF
  6//! (<small>see [construct][crate::construct] for character groups</small>):
  7//!
  8//! ```bnf
  9//! code_indented ::= filled_line *( eol *( blank_line eol ) filled_line )
 10//!
 11//! ; Restriction: at least one `line` byte must be `text`.
 12//! filled_line ::= 4(space_or_tab) *line
 13//! blank_line ::= *space_or_tab
 14//! ```
 15//!
 16//! As this construct occurs in flow, like all flow constructs, it must be
 17//! followed by an eol (line ending) or eof (end of file).
 18//!
 19//! In markdown, it is also possible to use [code (text)][raw_text] in the
 20//! [text][] content type.
 21//! It is also possible to create code with the [code (fenced)][raw_flow]
 22//! construct.
 23//!
 24//! ## HTML
 25//!
 26//! Code (indented) relates to both the `<pre>` and the `<code>` elements in
 27//! HTML.
 28//! See [*§ 4.4.3 The `pre` element*][html_pre] and the [*§ 4.5.15 The `code`
 29//! element*][html_code] in the HTML spec for more info.
 30//!
 31//! ## Recommendation
 32//!
 33//! It is recommended to use code (fenced) instead of code (indented).
 34//! Code (fenced) is more explicit, similar to code (text), and has support
 35//! for specifying the programming language.
 36//!
 37//! ## Tokens
 38//!
 39//! * [`CodeIndented`][Name::CodeIndented]
 40//! * [`CodeFlowChunk`][Name::CodeFlowChunk]
 41//! * [`LineEnding`][Name::LineEnding]
 42//! * [`SpaceOrTab`][Name::SpaceOrTab]
 43//!
 44//! ## References
 45//!
 46//! * [`code-indented.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/code-indented.js)
 47//! * [*§ 4.4 Indented code blocks* in `CommonMark`](https://spec.commonmark.org/0.31/#indented-code-blocks)
 48//!
 49//! [flow]: crate::construct::flow
 50//! [text]: crate::construct::text
 51//! [raw_flow]: crate::construct::raw_flow
 52//! [raw_text]: crate::construct::raw_text
 53//! [html_code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
 54//! [html_pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
 55
 56use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
 57use crate::event::Name;
 58use crate::state::{Name as StateName, State};
 59use crate::tokenizer::Tokenizer;
 60use crate::util::constant::TAB_SIZE;
 61
 62/// Start of code (indented).
 63///
 64/// > **Parsing note**: it is not needed to check if this first line is a
 65/// > filled line (that it has a non-whitespace character), because blank lines
 66/// > are parsed already, so we never run into that.
 67///
 68/// ```markdown
 69/// > |     aaa
 70///     ^
 71/// ```
 72pub fn start(tokenizer: &mut Tokenizer) -> State {
 73    // Do not interrupt paragraphs.
 74    if !tokenizer.interrupt
 75        && tokenizer.parse_state.options.constructs.code_indented
 76        && matches!(tokenizer.current, Some(b'\t' | b' '))
 77    {
 78        tokenizer.enter(Name::CodeIndented);
 79        tokenizer.attempt(State::Next(StateName::CodeIndentedAtBreak), State::Nok);
 80        State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
 81    } else {
 82        State::Nok
 83    }
 84}
 85
 86/// At a break.
 87///
 88/// ```markdown
 89/// > |     aaa
 90///         ^  ^
 91/// ```
 92pub fn at_break(tokenizer: &mut Tokenizer) -> State {
 93    match tokenizer.current {
 94        None => State::Retry(StateName::CodeIndentedAfter),
 95        Some(b'\n') => {
 96            tokenizer.attempt(
 97                State::Next(StateName::CodeIndentedAtBreak),
 98                State::Next(StateName::CodeIndentedAfter),
 99            );
100            State::Retry(StateName::CodeIndentedFurtherStart)
101        }
102        _ => {
103            tokenizer.enter(Name::CodeFlowChunk);
104            State::Retry(StateName::CodeIndentedInside)
105        }
106    }
107}
108
109/// In code content.
110///
111/// ```markdown
112/// > |     aaa
113///         ^^^^
114/// ```
115pub fn inside(tokenizer: &mut Tokenizer) -> State {
116    match tokenizer.current {
117        None | Some(b'\n') => {
118            tokenizer.exit(Name::CodeFlowChunk);
119            State::Retry(StateName::CodeIndentedAtBreak)
120        }
121        _ => {
122            tokenizer.consume();
123            State::Next(StateName::CodeIndentedInside)
124        }
125    }
126}
127
128/// After indented code.
129///
130/// ```markdown
131/// > |     aaa
132///            ^
133/// ```
134pub fn after(tokenizer: &mut Tokenizer) -> State {
135    tokenizer.exit(Name::CodeIndented);
136    // Feel free to interrupt.
137    tokenizer.interrupt = false;
138    State::Ok
139}
140
141/// At eol, trying to parse another indent.
142///
143/// ```markdown
144/// > |     aaa
145///            ^
146///   |     bbb
147/// ```
148pub fn further_start(tokenizer: &mut Tokenizer) -> State {
149    if tokenizer.lazy || tokenizer.pierce {
150        return State::Nok;
151    }
152
153    if tokenizer.current == Some(b'\n') {
154        tokenizer.enter(Name::LineEnding);
155        tokenizer.consume();
156        tokenizer.exit(Name::LineEnding);
157        State::Next(StateName::CodeIndentedFurtherStart)
158    } else {
159        tokenizer.attempt(State::Ok, State::Next(StateName::CodeIndentedFurtherBegin));
160        State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
161    }
162}
163
164/// At the beginning of a line that is not indented enough.
165///
166/// ```markdown
167///   |     aaa
168/// > |   bbb
169///     ^
170/// ```
171pub fn further_begin(tokenizer: &mut Tokenizer) -> State {
172    if matches!(tokenizer.current, Some(b'\t' | b' ')) {
173        tokenizer.attempt(State::Next(StateName::CodeIndentedFurtherAfter), State::Nok);
174        State::Retry(space_or_tab(tokenizer))
175    } else {
176        State::Nok
177    }
178}
179
180/// After whitespace, not indented enough.
181///
182/// ```markdown
183///   |     aaa
184/// > |   bbb
185///       ^
186/// ```
187pub fn further_after(tokenizer: &mut Tokenizer) -> State {
188    match tokenizer.current {
189        Some(b'\n') => State::Retry(StateName::CodeIndentedFurtherStart),
190        _ => State::Nok,
191    }
192}