Markdown parser fork with extended syntax for personal use.
1//! Byte order mark occurs at the start of the document.
2//!
3//! ## Grammar
4//!
5//! Byte order mark forms with the following BNF
6//! (<small>see [construct][crate::construct] for character groups</small>):
7//!
8//! ```bnf
9//! byte_order_mark ::= 0xEF 0xBB 0xBF
10//! ```
11//!
12//! ## Recommendation
13//!
14//! Don’t use BOMs.
15//!
16//! ## Tokens
17//!
18//! * [`ByteOrderMark`][Name::ByteOrderMark]
19//!
20//! ## References
21//!
22//! * [`micromark/lib/preprocess.js` in `micromark`](https://github.com/micromark/micromark/blob/ed23453/packages/micromark/dev/lib/preprocess.js#L54-L60)
23
24use crate::event::Name;
25use crate::state::{Name as StateName, State};
26use crate::tokenizer::Tokenizer;
27
28/// Bytes of a BOM.
29const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF];
30
31/// Before BOM.
32///
33/// ```text
34/// > | 0xEF 0xBB 0xBF
35/// ^^^^
36/// ```
37pub fn start(tokenizer: &mut Tokenizer) -> State {
38 if tokenizer.current == Some(BOM[0]) {
39 tokenizer.enter(Name::ByteOrderMark);
40 State::Retry(StateName::BomInside)
41 } else {
42 State::Nok
43 }
44}
45
46/// In BOM.
47///
48/// ```text
49/// > | 0xEF 0xBB 0xBF
50/// ^^^^ ^^^^ ^^^^
51/// ```
52pub fn inside(tokenizer: &mut Tokenizer) -> State {
53 if tokenizer.current == Some(BOM[tokenizer.tokenize_state.size]) {
54 tokenizer.tokenize_state.size += 1;
55 tokenizer.consume();
56 if tokenizer.tokenize_state.size == BOM.len() {
57 tokenizer.exit(Name::ByteOrderMark);
58 tokenizer.tokenize_state.size = 0;
59 State::Ok
60 } else {
61 State::Next(StateName::BomInside)
62 }
63 } else {
64 tokenizer.tokenize_state.size = 0;
65 State::Nok
66 }
67}