A go template renderer based on Perl's Template Toolkit
at main 9.1 kB view raw
1package gott 2 3import ( 4 "strings" 5 "unicode" 6 "unicode/utf8" 7) 8 9const eof = -1 10 11// Lexer tokenizes a template string into a sequence of tokens 12type Lexer struct { 13 input string // the string being scanned 14 start int // start position of current token 15 pos int // current position in input 16 width int // width of last rune read 17 line int // current line number (1-based) 18 linePos int // position of start of current line 19 tokens chan Token // channel of scanned tokens 20} 21 22// stateFn represents a lexer state function; returns the next state 23type stateFn func(*Lexer) stateFn 24 25// NewLexer creates a new lexer for the given input and starts scanning 26func NewLexer(input string) *Lexer { 27 l := &Lexer{ 28 input: input, 29 line: 1, 30 linePos: 0, 31 tokens: make(chan Token, 2), 32 } 33 go l.run() 34 return l 35} 36 37// run executes the state machine 38func (l *Lexer) run() { 39 for state := lexText; state != nil; { 40 state = state(l) 41 } 42 close(l.tokens) 43} 44 45// NextToken returns the next token from the lexer 46func (l *Lexer) NextToken() Token { 47 return <-l.tokens 48} 49 50// Tokens returns all tokens as a slice (consumes the channel) 51func (l *Lexer) Tokens() []Token { 52 var tokens []Token 53 for tok := range l.tokens { 54 tokens = append(tokens, tok) 55 } 56 return tokens 57} 58 59// emit sends a token to the tokens channel 60func (l *Lexer) emit(t TokenType) { 61 l.tokens <- Token{ 62 Type: t, 63 Value: l.input[l.start:l.pos], 64 Pos: l.currentPos(), 65 } 66 l.start = l.pos 67} 68 69// emitValue sends a token with a specific value 70func (l *Lexer) emitValue(t TokenType, value string) { 71 l.tokens <- Token{ 72 Type: t, 73 Value: value, 74 Pos: l.currentPos(), 75 } 76 l.start = l.pos 77} 78 79// currentPos returns the current position for error reporting 80func (l *Lexer) currentPos() Position { 81 return Position{ 82 Line: l.line, 83 Column: l.start - l.linePos + 1, 84 Offset: l.start, 85 } 86} 87 88// next returns the next rune in the input and advances the position 89func (l *Lexer) next() rune { 90 if l.pos >= len(l.input) { 91 l.width = 0 92 return eof 93 } 94 r, w := utf8.DecodeRuneInString(l.input[l.pos:]) 95 l.width = w 96 l.pos += l.width 97 if r == '\n' { 98 l.line++ 99 l.linePos = l.pos 100 } 101 return r 102} 103 104// backup steps back one rune (can only be called once per next call) 105func (l *Lexer) backup() { 106 l.pos -= l.width 107 // If we backed up over a newline, decrement line count 108 if l.pos < len(l.input) && l.input[l.pos] == '\n' { 109 l.line-- 110 // Find previous line start 111 l.linePos = strings.LastIndex(l.input[:l.pos], "\n") + 1 112 } 113} 114 115// peek returns the next rune without advancing 116func (l *Lexer) peek() rune { 117 r := l.next() 118 l.backup() 119 return r 120} 121 122// ignore skips over the pending input 123func (l *Lexer) ignore() { 124 l.start = l.pos 125} 126 127// errorf emits an error token and terminates scanning 128func (l *Lexer) errorf(format string, args ...any) stateFn { 129 l.tokens <- Token{ 130 Type: TokenError, 131 Value: sprintf(format, args...), 132 Pos: l.currentPos(), 133 } 134 return nil 135} 136 137// sprintf is a simple formatter (avoiding fmt import in hot path) 138func sprintf(format string, args ...any) string { 139 // Simple implementation - for errors only 140 result := format 141 for _, arg := range args { 142 if s, ok := arg.(string); ok { 143 result = strings.Replace(result, "%s", s, 1) 144 result = strings.Replace(result, "%q", "'"+s+"'", 1) 145 } 146 } 147 return result 148} 149 150// skipWhitespace advances past any whitespace characters 151func (l *Lexer) skipWhitespace() { 152 for { 153 r := l.next() 154 if r == eof || !unicode.IsSpace(r) { 155 l.backup() 156 break 157 } 158 } 159 l.ignore() 160} 161 162// hasPrefix checks if the remaining input starts with the given prefix 163func (l *Lexer) hasPrefix(prefix string) bool { 164 return strings.HasPrefix(l.input[l.pos:], prefix) 165} 166 167// ---- State Functions ---- 168 169// lexText scans text outside of tags until [% or EOF 170func lexText(l *Lexer) stateFn { 171 for { 172 if l.hasPrefix("[%") { 173 if l.pos > l.start { 174 l.emit(TokenText) 175 } 176 return lexTagOpen 177 } 178 if l.next() == eof { 179 break 180 } 181 } 182 // Emit any remaining text 183 if l.pos > l.start { 184 l.emit(TokenText) 185 } 186 l.emit(TokenEOF) 187 return nil 188} 189 190// lexTagOpen scans the [% opening delimiter 191func lexTagOpen(l *Lexer) stateFn { 192 l.pos += 2 // skip [% 193 l.emit(TokenTagOpen) 194 return lexInsideTag 195} 196 197// lexInsideTag scans inside a [% ... %] tag 198func lexInsideTag(l *Lexer) stateFn { 199 l.skipWhitespace() 200 201 // Check for closing tag 202 if l.hasPrefix("%]") { 203 l.pos += 2 204 l.emit(TokenTagClose) 205 return lexText 206 } 207 208 // Check for two-character operators first 209 twoCharOps := []struct { 210 str string 211 tok TokenType 212 }{ 213 {"==", TokenEq}, 214 {"!=", TokenNe}, 215 {">=", TokenGe}, 216 {"<=", TokenLe}, 217 {"&&", TokenAnd}, 218 {"||", TokenOr}, 219 } 220 for _, op := range twoCharOps { 221 if l.hasPrefix(op.str) { 222 l.pos += 2 223 l.emit(op.tok) 224 return lexInsideTag 225 } 226 } 227 228 // Check for single-character operators/delimiters 229 r := l.next() 230 switch r { 231 case eof: 232 return l.errorf("unclosed tag") 233 case '>': 234 l.emit(TokenGt) 235 return lexInsideTag 236 case '<': 237 l.emit(TokenLt) 238 return lexInsideTag 239 case '+': 240 l.emit(TokenPlus) 241 return lexInsideTag 242 case '-': 243 // Could be minus or negative number 244 if unicode.IsDigit(l.peek()) { 245 l.backup() 246 return lexNumber 247 } 248 l.emit(TokenMinus) 249 return lexInsideTag 250 case '*': 251 l.emit(TokenMul) 252 return lexInsideTag 253 case '/': 254 l.emit(TokenDiv) 255 return lexInsideTag 256 case '%': 257 // Check if this is %] (tag close) or % (modulo) 258 if l.peek() == ']' { 259 l.backup() 260 l.pos += 2 261 l.emit(TokenTagClose) 262 return lexText 263 } 264 l.emit(TokenMod) 265 return lexInsideTag 266 case '.': 267 l.emit(TokenDot) 268 return lexInsideTag 269 case '|': 270 l.emit(TokenPipe) 271 return lexInsideTag 272 case '(': 273 l.emit(TokenLParen) 274 return lexInsideTag 275 case ')': 276 l.emit(TokenRParen) 277 return lexInsideTag 278 case ',': 279 l.emit(TokenComma) 280 return lexInsideTag 281 case '=': 282 l.emit(TokenAssign) 283 return lexInsideTag 284 case '$': 285 l.emit(TokenDollar) 286 return lexInsideTag 287 case '"', '\'': 288 l.backup() 289 return lexString 290 } 291 292 // Check for number 293 if unicode.IsDigit(r) { 294 l.backup() 295 return lexNumber 296 } 297 298 // Must be identifier or keyword 299 if isAlpha(r) || r == '_' { 300 l.backup() 301 return lexIdentifier 302 } 303 304 return l.errorf("unexpected character: %s", string(r)) 305} 306 307// lexString scans a quoted string literal (single or double quotes) 308func lexString(l *Lexer) stateFn { 309 quote := l.next() // consume opening quote 310 l.ignore() // don't include quote in value 311 312 for { 313 r := l.next() 314 if r == eof { 315 return l.errorf("unterminated string") 316 } 317 if r == '\\' { 318 // Skip escaped character 319 l.next() 320 continue 321 } 322 if r == quote { 323 // Don't include closing quote in value 324 l.backup() 325 l.emit(TokenString) 326 l.next() // consume closing quote 327 l.ignore() 328 return lexInsideTag 329 } 330 } 331} 332 333// lexNumber scans a number (integer or float, possibly negative) 334func lexNumber(l *Lexer) stateFn { 335 // Optional leading minus 336 if l.peek() == '-' { 337 l.next() 338 } 339 340 // Integer part 341 digits := false 342 for unicode.IsDigit(l.peek()) { 343 l.next() 344 digits = true 345 } 346 347 if !digits { 348 return l.errorf("expected digits in number") 349 } 350 351 // Optional decimal part 352 if l.peek() == '.' { 353 l.next() 354 for unicode.IsDigit(l.peek()) { 355 l.next() 356 } 357 } 358 359 l.emit(TokenNumber) 360 return lexInsideTag 361} 362 363// lexIdentifier scans an identifier or keyword 364func lexIdentifier(l *Lexer) stateFn { 365 for { 366 r := l.next() 367 if !isAlphaNumeric(r) && r != '_' { 368 l.backup() 369 break 370 } 371 } 372 373 word := l.input[l.start:l.pos] 374 tokType := LookupKeyword(word) 375 l.emit(tokType) 376 return lexInsideTag 377} 378 379// isAlpha returns true if r is an alphabetic character 380func isAlpha(r rune) bool { 381 return unicode.IsLetter(r) 382} 383 384// isAlphaNumeric returns true if r is alphanumeric 385func isAlphaNumeric(r rune) bool { 386 return unicode.IsLetter(r) || unicode.IsDigit(r) 387}