a post-component library for building user-interfaces on the web.
1import { assert } from '../shared.ts'
2
3export const DATA = 0
4export const TAG_OPEN = 1
5export const END_TAG_OPEN = 2
6export const TAG_NAME = 3
7export const BEFORE_ATTR_NAME = 4
8export const ATTR_NAME = 5
9export const AFTER_ATTR_NAME = 6
10export const BEFORE_ATTR_VALUE = 7
11export const ATTR_VALUE_DOUBLE_QUOTED = 8
12export const ATTR_VALUE_SINGLE_QUOTED = 9
13export const ATTR_VALUE_UNQUOTED = 10
14export const AFTER_ATTR_VALUE = 11
15export const SELF_CLOSING_START_TAG = 12
16export const COMMENT2 = 13 // a comment2 is any type of comment that ends with ">" and not "-->"
17export const EXCLAIM = 14
18export const COMMENT = 15
19
20export type State =
21 | typeof DATA
22 | typeof TAG_OPEN
23 | typeof END_TAG_OPEN
24 | typeof TAG_NAME
25 | typeof BEFORE_ATTR_NAME
26 | typeof ATTR_NAME
27 | typeof AFTER_ATTR_NAME
28 | typeof BEFORE_ATTR_VALUE
29 | typeof ATTR_VALUE_DOUBLE_QUOTED
30 | typeof ATTR_VALUE_SINGLE_QUOTED
31 | typeof ATTR_VALUE_UNQUOTED
32 | typeof AFTER_ATTR_VALUE
33 | typeof SELF_CLOSING_START_TAG
34 | typeof COMMENT2
35 | typeof EXCLAIM
36 | typeof COMMENT
37
38const ALPHA = /[a-z]/i
39
40export function* lex(statics: TemplateStringsArray): Generator<[char: string, state: State], void, void> {
41 assert(!statics.some(s => s.includes('\0')))
42
43 const input = statics.join('\0')
44
45 let state: State = DATA
46 let i = 0
47
48 while (i < input.length) {
49 const c = input[i++]
50
51 switch (state) {
52 case DATA: // https://html.spec.whatwg.org/multipage/parsing.html#data-state
53 if (c === '<') state = TAG_OPEN
54 break
55
56 case TAG_OPEN: // https://html.spec.whatwg.org/multipage/parsing.html#tag-open-state
57 if (c === '!') state = EXCLAIM
58 else if (c === '/') state = END_TAG_OPEN
59 else if (c === '?') state = COMMENT2
60 else {
61 state = TAG_NAME
62 i--
63 continue
64 }
65 break
66
67 case END_TAG_OPEN: // https://html.spec.whatwg.org/multipage/parsing.html#end-tag-open-state
68 if (c === '>') state = DATA
69 else if (ALPHA.test(c)) {
70 state = TAG_NAME
71 i--
72 continue
73 } else {
74 state = COMMENT2
75 i--
76 continue
77 }
78 break
79
80 case TAG_NAME: // https://html.spec.whatwg.org/multipage/parsing.html#tag-name-state
81 if (c === '\t' || c === '\n' || c === '\f' || c === ' ') state = BEFORE_ATTR_NAME
82 else if (c === '/') state = SELF_CLOSING_START_TAG
83 else if (c === '>') state = DATA
84 break
85
86 case BEFORE_ATTR_NAME: // https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-name-state
87 if (c === '\t' || c === '\n' || c === '\f' || c === ' ') {
88 } else if (c === '/' || c === '>') {
89 state = AFTER_ATTR_NAME
90 i--
91 continue
92 } else {
93 state = ATTR_NAME
94 i--
95 continue
96 }
97 break
98
99 case ATTR_NAME: // https://html.spec.whatwg.org/multipage/parsing.html#attribute-name-state
100 if (c === '\t' || c === '\n' || c === '\f' || c === ' ' || c === '/' || c === '>') {
101 state = AFTER_ATTR_NAME
102 i--
103 continue
104 } else if (c === '=') state = BEFORE_ATTR_VALUE
105 break
106
107 case AFTER_ATTR_NAME: // https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-name-state
108 if (c === '\t' || c === '\n' || c === '\f' || c === ' ') {
109 } else if (c === '/') state = SELF_CLOSING_START_TAG
110 else if (c === '=') state = BEFORE_ATTR_VALUE
111 else if (c === '>') state = DATA
112 else {
113 state = ATTR_NAME
114 i--
115 continue
116 }
117 break
118
119 case BEFORE_ATTR_VALUE: // https://html.spec.whatwg.org/multipage/parsing.html#before-attribute-value-state
120 if (c === '\t' || c === '\n' || c === '\f' || c === ' ') {
121 } else if (c === '"') state = ATTR_VALUE_DOUBLE_QUOTED
122 else if (c === "'") state = ATTR_VALUE_SINGLE_QUOTED
123 else if (c === '>') state = DATA
124 else {
125 state = ATTR_VALUE_UNQUOTED
126 i--
127 continue
128 }
129 break
130
131 case ATTR_VALUE_DOUBLE_QUOTED: // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(double-quoted)-state
132 if (c === '"') state = AFTER_ATTR_VALUE
133 break
134
135 case ATTR_VALUE_SINGLE_QUOTED: // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(single-quoted)-state
136 if (c === "'") state = AFTER_ATTR_VALUE
137 break
138
139 case ATTR_VALUE_UNQUOTED: // https://html.spec.whatwg.org/multipage/parsing.html#attribute-value-(unquoted)-state
140 if (c === '\t' || c === '\n' || c === '\f' || c === ' ') state = BEFORE_ATTR_NAME
141 else if (c === '>') state = DATA
142 break
143
144 case AFTER_ATTR_VALUE: // https://html.spec.whatwg.org/multipage/parsing.html#after-attribute-value-(quoted)-state
145 if (c === '\t' || c === '\n' || c === '\f' || c === ' ') state = BEFORE_ATTR_NAME
146 else if (c === '/') state = SELF_CLOSING_START_TAG
147 else if (c === '>') state = DATA
148 else {
149 state = BEFORE_ATTR_NAME
150 i--
151 continue
152 }
153 break
154
155 case SELF_CLOSING_START_TAG: // https://html.spec.whatwg.org/multipage/parsing.html#self-closing-start-tag-state
156 if (c === '>') state = DATA
157 else {
158 state = BEFORE_ATTR_NAME
159 i--
160 continue
161 }
162 break
163
164 case COMMENT2: // https://html.spec.whatwg.org/multipage/parsing.html#bogus-comment-state
165 if (c === '>') state = DATA
166 break
167
168 case EXCLAIM: // https://html.spec.whatwg.org/multipage/parsing.html#markup-declaration-open-state
169 if (c === '-' && input[i] === '-') {
170 i++
171 yield ['-', COMMENT]
172 state = COMMENT
173 } else {
174 state = COMMENT2
175 }
176 break
177
178 case COMMENT: // https://html.spec.whatwg.org/multipage/parsing.html#comment-state
179 if (c === '-' && input[i] === '-' && input[i + 1] === '>') {
180 yield ['-', COMMENT]
181 i++
182 yield ['-', COMMENT]
183 i++
184 yield ['>', COMMENT]
185 state = DATA
186 continue
187 }
188 break
189
190 default:
191 state satisfies never
192 }
193
194 yield [c, state]
195 }
196}