A go template renderer based on Perl's Template Toolkit
1package gott
2
3import (
4 "strings"
5 "unicode"
6 "unicode/utf8"
7)
8
9const eof = -1
10
11// Lexer tokenizes a template string into a sequence of tokens
12type Lexer struct {
13 input string // the string being scanned
14 start int // start position of current token
15 pos int // current position in input
16 width int // width of last rune read
17 line int // current line number (1-based)
18 linePos int // position of start of current line
19 tokens chan Token // channel of scanned tokens
20}
21
22// stateFn represents a lexer state function; returns the next state
23type stateFn func(*Lexer) stateFn
24
25// NewLexer creates a new lexer for the given input and starts scanning
26func NewLexer(input string) *Lexer {
27 l := &Lexer{
28 input: input,
29 line: 1,
30 linePos: 0,
31 tokens: make(chan Token, 2),
32 }
33 go l.run()
34 return l
35}
36
37// run executes the state machine
38func (l *Lexer) run() {
39 for state := lexText; state != nil; {
40 state = state(l)
41 }
42 close(l.tokens)
43}
44
45// NextToken returns the next token from the lexer
46func (l *Lexer) NextToken() Token {
47 return <-l.tokens
48}
49
50// Tokens returns all tokens as a slice (consumes the channel)
51func (l *Lexer) Tokens() []Token {
52 var tokens []Token
53 for tok := range l.tokens {
54 tokens = append(tokens, tok)
55 }
56 return tokens
57}
58
59// emit sends a token to the tokens channel
60func (l *Lexer) emit(t TokenType) {
61 l.tokens <- Token{
62 Type: t,
63 Value: l.input[l.start:l.pos],
64 Pos: l.currentPos(),
65 }
66 l.start = l.pos
67}
68
69// emitValue sends a token with a specific value
70func (l *Lexer) emitValue(t TokenType, value string) {
71 l.tokens <- Token{
72 Type: t,
73 Value: value,
74 Pos: l.currentPos(),
75 }
76 l.start = l.pos
77}
78
79// currentPos returns the current position for error reporting
80func (l *Lexer) currentPos() Position {
81 return Position{
82 Line: l.line,
83 Column: l.start - l.linePos + 1,
84 Offset: l.start,
85 }
86}
87
88// next returns the next rune in the input and advances the position
89func (l *Lexer) next() rune {
90 if l.pos >= len(l.input) {
91 l.width = 0
92 return eof
93 }
94 r, w := utf8.DecodeRuneInString(l.input[l.pos:])
95 l.width = w
96 l.pos += l.width
97 if r == '\n' {
98 l.line++
99 l.linePos = l.pos
100 }
101 return r
102}
103
104// backup steps back one rune (can only be called once per next call)
105func (l *Lexer) backup() {
106 l.pos -= l.width
107 // If we backed up over a newline, decrement line count
108 if l.pos < len(l.input) && l.input[l.pos] == '\n' {
109 l.line--
110 // Find previous line start
111 l.linePos = strings.LastIndex(l.input[:l.pos], "\n") + 1
112 }
113}
114
115// peek returns the next rune without advancing
116func (l *Lexer) peek() rune {
117 r := l.next()
118 l.backup()
119 return r
120}
121
122// ignore skips over the pending input
123func (l *Lexer) ignore() {
124 l.start = l.pos
125}
126
127// errorf emits an error token and terminates scanning
128func (l *Lexer) errorf(format string, args ...any) stateFn {
129 l.tokens <- Token{
130 Type: TokenError,
131 Value: sprintf(format, args...),
132 Pos: l.currentPos(),
133 }
134 return nil
135}
136
137// sprintf is a simple formatter (avoiding fmt import in hot path)
138func sprintf(format string, args ...any) string {
139 // Simple implementation - for errors only
140 result := format
141 for _, arg := range args {
142 if s, ok := arg.(string); ok {
143 result = strings.Replace(result, "%s", s, 1)
144 result = strings.Replace(result, "%q", "'"+s+"'", 1)
145 }
146 }
147 return result
148}
149
150// skipWhitespace advances past any whitespace characters
151func (l *Lexer) skipWhitespace() {
152 for {
153 r := l.next()
154 if r == eof || !unicode.IsSpace(r) {
155 l.backup()
156 break
157 }
158 }
159 l.ignore()
160}
161
162// hasPrefix checks if the remaining input starts with the given prefix
163func (l *Lexer) hasPrefix(prefix string) bool {
164 return strings.HasPrefix(l.input[l.pos:], prefix)
165}
166
167// ---- State Functions ----
168
169// lexText scans text outside of tags until [% or EOF
170func lexText(l *Lexer) stateFn {
171 for {
172 if l.hasPrefix("[%") {
173 if l.pos > l.start {
174 l.emit(TokenText)
175 }
176 return lexTagOpen
177 }
178 if l.next() == eof {
179 break
180 }
181 }
182 // Emit any remaining text
183 if l.pos > l.start {
184 l.emit(TokenText)
185 }
186 l.emit(TokenEOF)
187 return nil
188}
189
190// lexTagOpen scans the [% opening delimiter
191func lexTagOpen(l *Lexer) stateFn {
192 l.pos += 2 // skip [%
193 l.emit(TokenTagOpen)
194 return lexInsideTag
195}
196
197// lexInsideTag scans inside a [% ... %] tag
198func lexInsideTag(l *Lexer) stateFn {
199 l.skipWhitespace()
200
201 // Check for closing tag
202 if l.hasPrefix("%]") {
203 l.pos += 2
204 l.emit(TokenTagClose)
205 return lexText
206 }
207
208 // Check for two-character operators first
209 twoCharOps := []struct {
210 str string
211 tok TokenType
212 }{
213 {"==", TokenEq},
214 {"!=", TokenNe},
215 {">=", TokenGe},
216 {"<=", TokenLe},
217 {"&&", TokenAnd},
218 {"||", TokenOr},
219 }
220 for _, op := range twoCharOps {
221 if l.hasPrefix(op.str) {
222 l.pos += 2
223 l.emit(op.tok)
224 return lexInsideTag
225 }
226 }
227
228 // Check for single-character operators/delimiters
229 r := l.next()
230 switch r {
231 case eof:
232 return l.errorf("unclosed tag")
233 case '>':
234 l.emit(TokenGt)
235 return lexInsideTag
236 case '<':
237 l.emit(TokenLt)
238 return lexInsideTag
239 case '+':
240 l.emit(TokenPlus)
241 return lexInsideTag
242 case '-':
243 // Could be minus or negative number
244 if unicode.IsDigit(l.peek()) {
245 l.backup()
246 return lexNumber
247 }
248 l.emit(TokenMinus)
249 return lexInsideTag
250 case '*':
251 l.emit(TokenMul)
252 return lexInsideTag
253 case '/':
254 l.emit(TokenDiv)
255 return lexInsideTag
256 case '%':
257 // Check if this is %] (tag close) or % (modulo)
258 if l.peek() == ']' {
259 l.backup()
260 l.pos += 2
261 l.emit(TokenTagClose)
262 return lexText
263 }
264 l.emit(TokenMod)
265 return lexInsideTag
266 case '.':
267 l.emit(TokenDot)
268 return lexInsideTag
269 case '|':
270 l.emit(TokenPipe)
271 return lexInsideTag
272 case '(':
273 l.emit(TokenLParen)
274 return lexInsideTag
275 case ')':
276 l.emit(TokenRParen)
277 return lexInsideTag
278 case ',':
279 l.emit(TokenComma)
280 return lexInsideTag
281 case '=':
282 l.emit(TokenAssign)
283 return lexInsideTag
284 case '$':
285 l.emit(TokenDollar)
286 return lexInsideTag
287 case '"', '\'':
288 l.backup()
289 return lexString
290 }
291
292 // Check for number
293 if unicode.IsDigit(r) {
294 l.backup()
295 return lexNumber
296 }
297
298 // Must be identifier or keyword
299 if isAlpha(r) || r == '_' {
300 l.backup()
301 return lexIdentifier
302 }
303
304 return l.errorf("unexpected character: %s", string(r))
305}
306
307// lexString scans a quoted string literal (single or double quotes)
308func lexString(l *Lexer) stateFn {
309 quote := l.next() // consume opening quote
310 l.ignore() // don't include quote in value
311
312 for {
313 r := l.next()
314 if r == eof {
315 return l.errorf("unterminated string")
316 }
317 if r == '\\' {
318 // Skip escaped character
319 l.next()
320 continue
321 }
322 if r == quote {
323 // Don't include closing quote in value
324 l.backup()
325 l.emit(TokenString)
326 l.next() // consume closing quote
327 l.ignore()
328 return lexInsideTag
329 }
330 }
331}
332
333// lexNumber scans a number (integer or float, possibly negative)
334func lexNumber(l *Lexer) stateFn {
335 // Optional leading minus
336 if l.peek() == '-' {
337 l.next()
338 }
339
340 // Integer part
341 digits := false
342 for unicode.IsDigit(l.peek()) {
343 l.next()
344 digits = true
345 }
346
347 if !digits {
348 return l.errorf("expected digits in number")
349 }
350
351 // Optional decimal part
352 if l.peek() == '.' {
353 l.next()
354 for unicode.IsDigit(l.peek()) {
355 l.next()
356 }
357 }
358
359 l.emit(TokenNumber)
360 return lexInsideTag
361}
362
363// lexIdentifier scans an identifier or keyword
364func lexIdentifier(l *Lexer) stateFn {
365 for {
366 r := l.next()
367 if !isAlphaNumeric(r) && r != '_' {
368 l.backup()
369 break
370 }
371 }
372
373 word := l.input[l.start:l.pos]
374 tokType := LookupKeyword(word)
375 l.emit(tokType)
376 return lexInsideTag
377}
378
379// isAlpha returns true if r is an alphabetic character
380func isAlpha(r rune) bool {
381 return unicode.IsLetter(r)
382}
383
384// isAlphaNumeric returns true if r is alphanumeric
385func isAlphaNumeric(r rune) bool {
386 return unicode.IsLetter(r) || unicode.IsDigit(r)
387}