Markdown parser fork with extended syntax for personal use.
at hack 67 lines 1.6 kB view raw
1//! Byte order mark occurs at the start of the document. 2//! 3//! ## Grammar 4//! 5//! Byte order mark forms with the following BNF 6//! (<small>see [construct][crate::construct] for character groups</small>): 7//! 8//! ```bnf 9//! byte_order_mark ::= 0xEF 0xBB 0xBF 10//! ``` 11//! 12//! ## Recommendation 13//! 14//! Don’t use BOMs. 15//! 16//! ## Tokens 17//! 18//! * [`ByteOrderMark`][Name::ByteOrderMark] 19//! 20//! ## References 21//! 22//! * [`micromark/lib/preprocess.js` in `micromark`](https://github.com/micromark/micromark/blob/ed23453/packages/micromark/dev/lib/preprocess.js#L54-L60) 23 24use crate::event::Name; 25use crate::state::{Name as StateName, State}; 26use crate::tokenizer::Tokenizer; 27 28/// Bytes of a BOM. 29const BOM: [u8; 3] = [0xEF, 0xBB, 0xBF]; 30 31/// Before BOM. 32/// 33/// ```text 34/// > | 0xEF 0xBB 0xBF 35/// ^^^^ 36/// ``` 37pub fn start(tokenizer: &mut Tokenizer) -> State { 38 if tokenizer.current == Some(BOM[0]) { 39 tokenizer.enter(Name::ByteOrderMark); 40 State::Retry(StateName::BomInside) 41 } else { 42 State::Nok 43 } 44} 45 46/// In BOM. 47/// 48/// ```text 49/// > | 0xEF 0xBB 0xBF 50/// ^^^^ ^^^^ ^^^^ 51/// ``` 52pub fn inside(tokenizer: &mut Tokenizer) -> State { 53 if tokenizer.current == Some(BOM[tokenizer.tokenize_state.size]) { 54 tokenizer.tokenize_state.size += 1; 55 tokenizer.consume(); 56 if tokenizer.tokenize_state.size == BOM.len() { 57 tokenizer.exit(Name::ByteOrderMark); 58 tokenizer.tokenize_state.size = 0; 59 State::Ok 60 } else { 61 State::Next(StateName::BomInside) 62 } 63 } else { 64 tokenizer.tokenize_state.size = 0; 65 State::Nok 66 } 67}