this repo has no description
at master 514 lines 18 kB view raw
1// Copyright 2024 The CUE Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// Package toml converts TOML to and from CUE. 16// 17// WARNING: THIS PACKAGE IS EXPERIMENTAL. 18// ITS API MAY CHANGE AT ANY TIME. 19package toml 20 21import ( 22 "fmt" 23 "io" 24 "maps" 25 "slices" 26 "strconv" 27 "strings" 28 "time" 29 30 toml "github.com/pelletier/go-toml/v2/unstable" 31 32 "cuelang.org/go/cue/ast" 33 "cuelang.org/go/cue/errors" 34 "cuelang.org/go/cue/literal" 35 "cuelang.org/go/cue/token" 36) 37 38// TODO(mvdan): schema and decode options 39 40// NewDecoder creates a decoder from a stream of TOML input. 41func NewDecoder(filename string, r io.Reader) *Decoder { 42 // Note that we don't consume the reader here, 43 // as there's no need, and we can't return an error either. 44 return &Decoder{r: r, filename: filename, seenTableKeys: make(map[string]bool)} 45} 46 47// Decoder implements the decoding state. 48// 49// Note that TOML files and streams never decode multiple CUE nodes; 50// subsequent calls to [Decoder.Decode] may return [io.EOF]. 51type Decoder struct { 52 r io.Reader 53 54 filename string 55 56 decoded bool // whether [Decoder.Decoded] has been called already 57 parser toml.Parser 58 59 // seenTableKeys tracks which rooted keys we have already decoded as tables, 60 // as duplicate table keys in TOML are not allowed. 61 seenTableKeys map[rootedKey]bool 62 63 // topFile is the top-level CUE file we are decoding into. 64 // TODO(mvdan): make an *ast.File once the decoder returns ast.Node rather than ast.Expr. 65 topFile *ast.StructLit 66 67 // tokenFile is used to create positions which can be used for error values and syntax tree nodes. 68 tokenFile *token.File 69 70 // openTableArrays keeps track of all the declared table arrays so that 71 // later headers can append a new table array element, or add a field 72 // to the last element in a table array. 73 // 74 // TODO(mvdan): an unsorted slice means we do two linear searches per header key. 75 // For N distinct `[[keys]]`, this means a decoding runtime of O(2*N*N). 76 // Consider either sorting this array so we can do a binary search for O(N*log2(N)), 77 // or perhaps a tree, although for a nesting level D, that could cause O(N*D), 78 // and a tree would use more slices and so more allocations. 79 // 80 // Note that a map is not a good option either, because even though it makes 81 // exact lookups cheap, prefix matches are still linear and relatively slow. 82 // A sorted slice allows both mechanisms to use a form of binary search. 83 openTableArrays []openTableArray 84 85 // currentTableKey is the rooted key for the current table where the following 86 // TOML `key = value` lines will be inserted. 87 currentTableKey rootedKey 88 89 // currentTable is the CUE struct literal for currentTableKey. 90 // It is nil before the first [header] or [[header]], 91 // in which case any key-values are inserted in topFile. 92 currentTable *ast.StructLit 93} 94 95// rootedKey is a dot-separated path from the root of the TOML document. 96// The string elements in between the dots may be quoted to avoid ambiguity. 97// For the time being, this is just an alias for the sake of documentation. 98// 99// A path into an array element is like "arr.3", 100// which looks very similar to a table's "tbl.key", 101// particularly since a table key can be any string. 102// However, we just need these keys to detect duplicates, 103// and a path cannot be both an array and table, so it's OK. 104type rootedKey = string 105 106// openTableArray records information about a declared table array. 107type openTableArray struct { 108 rkey rootedKey 109 level int // the level of nesting, 1 or higher, e.g. 2 for key="foo.bar" 110 list *ast.ListLit 111 lastTable *ast.StructLit 112} 113 114// TODO(mvdan): support decoding comments 115 116// Decode parses the input stream as TOML and converts it to a CUE [*ast.File]. 117// Because TOML files only contain a single top-level expression, 118// subsequent calls to this method may return [io.EOF]. 119func (d *Decoder) Decode() (ast.Expr, error) { 120 if d.decoded { 121 return nil, io.EOF 122 } 123 d.decoded = true 124 // TODO(mvdan): unfortunately go-toml does not support streaming as of v2.2.2. 125 data, err := io.ReadAll(d.r) 126 if err != nil { 127 return nil, err 128 } 129 d.tokenFile = token.NewFile(d.filename, 0, len(data)) 130 d.tokenFile.SetLinesForContent(data) 131 d.parser.Reset(data) 132 // Note that if the input is empty the result will be the same 133 // as for an empty table: an empty struct. 134 // The TOML spec and other decoders also work this way. 135 d.topFile = &ast.StructLit{} 136 for d.parser.NextExpression() { 137 if err := d.nextRootNode(d.parser.Expression()); err != nil { 138 return nil, err 139 } 140 } 141 if err := d.parser.Error(); err != nil { 142 if err, ok := err.(*toml.ParserError); ok { 143 shape := d.parser.Shape(d.parser.Range(err.Highlight)) 144 return nil, d.posErrf(shape.Start, "%s", err.Message) 145 } 146 return nil, err 147 } 148 return d.topFile, nil 149} 150 151func (d *Decoder) shape(tnode *toml.Node) toml.Shape { 152 if tnode.Raw.Length == 0 { 153 // Otherwise the Shape method call below happily returns a position like 1:1, 154 // which is worse than no position information as it confuses the user. 155 panic("Decoder.nodePos was given an empty toml.Node as position") 156 } 157 return d.parser.Shape(tnode.Raw) 158} 159 160func (d *Decoder) nodeErrf(tnode *toml.Node, format string, args ...any) error { 161 return d.posErrf(d.shape(tnode).Start, format, args...) 162} 163 164func (d *Decoder) posErrf(pos toml.Position, format string, args ...any) error { 165 return errors.Newf(d.tokenFile.Pos(pos.Offset, token.NoRelPos), format, args...) 166} 167 168// nextRootNode is called for every top-level expression from the TOML parser. 169// 170// This method does not return a syntax tree node directly, 171// because some kinds of top-level expressions like comments and table headers 172// require recording some state in the decoder to produce a node at a later time. 173func (d *Decoder) nextRootNode(tnode *toml.Node) error { 174 switch tnode.Kind { 175 // Key-Values in TOML are in the form of: 176 // 177 // foo.title = "Foo" 178 // foo.bar.baz = "value" 179 // 180 // We decode them as "inline" structs in CUE, which keeps the original shape: 181 // 182 // foo: title: "Foo" 183 // foo: bar: baz: "value" 184 // 185 // An alternative would be to join struct literals, which avoids some repetition, 186 // but also introduces extra lines and may break some comment positions: 187 // 188 // foo: { 189 // title: "Foo" 190 // bar: baz: "value" 191 // } 192 case toml.KeyValue: 193 // Top-level fields begin a new line. 194 field, err := d.decodeField(d.currentTableKey, tnode, token.Newline) 195 if err != nil { 196 return err 197 } 198 if d.currentTable != nil { 199 d.currentTable.Elts = append(d.currentTable.Elts, field) 200 } else { 201 d.topFile.Elts = append(d.topFile.Elts, field) 202 } 203 204 case toml.Table: 205 // Tables always begin a new line. 206 key, keyElems := d.decodeKey("", tnode.Key()) 207 208 // All table keys must be unique, including for the top-level table. 209 if d.seenTableKeys[key] { 210 return d.nodeErrf(tnode.Child(), "duplicate key: %s", key) 211 } 212 d.seenTableKeys[key] = true 213 214 // We want a multi-line struct with curly braces, 215 // just like TOML's tables are on multiple lines. 216 d.currentTable = &ast.StructLit{ 217 // No positions, as TOML doesn't have table delimiters. 218 Lbrace: token.NoPos.WithRel(token.Blank), 219 Rbrace: token.NoPos.WithRel(token.Newline), 220 } 221 array := d.findArrayPrefix(key) 222 if array != nil { // [last_array.new_table] 223 if array.rkey == key { 224 return d.nodeErrf(tnode.Child(), "cannot redeclare table array %q as a table", key) 225 } 226 subKeyElems := keyElems[array.level:] 227 topField, leafField := d.inlineFields(subKeyElems, token.Newline) 228 array.lastTable.Elts = append(array.lastTable.Elts, topField) 229 leafField.Value = d.currentTable 230 } else { // [new_table] 231 topField, leafField := d.inlineFields(keyElems, token.Newline) 232 d.topFile.Elts = append(d.topFile.Elts, topField) 233 leafField.Value = d.currentTable 234 } 235 d.currentTableKey = key 236 237 case toml.ArrayTable: 238 // Table array elements always begin a new line. 239 key, keyElems := d.decodeKey("", tnode.Key()) 240 if d.seenTableKeys[key] { 241 return d.nodeErrf(tnode.Child(), "cannot redeclare key %q as a table array", key) 242 } 243 // Each struct inside a table array sits on separate lines. 244 d.currentTable = &ast.StructLit{ 245 // No positions, as TOML doesn't have table delimiters. 246 Lbrace: token.NoPos.WithRel(token.Newline), 247 Rbrace: token.NoPos.WithRel(token.Newline), 248 } 249 if array := d.findArrayPrefix(key); array != nil && array.level == len(keyElems) { 250 // [[last_array]] - appending to an existing array. 251 d.currentTableKey = key + "." + strconv.Itoa(len(array.list.Elts)) 252 array.lastTable = d.currentTable 253 array.list.Elts = append(array.list.Elts, d.currentTable) 254 } else { 255 // Creating a new array via either [[new_array]] or [[last_array.new_array]]. 256 // We want a multi-line list with square braces, 257 // since TOML's table arrays are on multiple lines. 258 list := &ast.ListLit{ 259 // No positions, as TOML doesn't have array table delimiters. 260 Lbrack: token.NoPos.WithRel(token.Blank), 261 Rbrack: token.NoPos.WithRel(token.Newline), 262 } 263 if array == nil { 264 // [[new_array]] - at the top level 265 topField, leafField := d.inlineFields(keyElems, token.Newline) 266 d.topFile.Elts = append(d.topFile.Elts, topField) 267 leafField.Value = list 268 } else { 269 // [[last_array.new_array]] - on the last array element 270 subKeyElems := keyElems[array.level:] 271 topField, leafField := d.inlineFields(subKeyElems, token.Newline) 272 array.lastTable.Elts = append(array.lastTable.Elts, topField) 273 leafField.Value = list 274 } 275 276 d.currentTableKey = key + ".0" 277 list.Elts = append(list.Elts, d.currentTable) 278 d.openTableArrays = append(d.openTableArrays, openTableArray{ 279 rkey: key, 280 level: len(keyElems), 281 list: list, 282 lastTable: d.currentTable, 283 }) 284 } 285 286 default: 287 return fmt.Errorf("encoding/toml.Decoder.nextRootNode: unknown %s %#v", tnode.Kind, tnode) 288 } 289 return nil 290} 291 292// decodeField decodes a single table key and its value as a struct field. 293func (d *Decoder) decodeField(rkey rootedKey, tnode *toml.Node, relPos token.RelPos) (*ast.Field, error) { 294 rkey, keyElems := d.decodeKey(rkey, tnode.Key()) 295 if d.findArray(rkey) != nil { 296 return nil, d.nodeErrf(tnode.Child().Next(), "cannot redeclare table array %q as a table", rkey) 297 } 298 topField, leafField := d.inlineFields(keyElems, relPos) 299 // All table keys must be unique, including inner table ones. 300 if d.seenTableKeys[rkey] { 301 return nil, d.nodeErrf(tnode.Child().Next(), "duplicate key: %s", rkey) 302 } 303 d.seenTableKeys[rkey] = true 304 value, err := d.decodeExpr(rkey, tnode.Value()) 305 if err != nil { 306 return nil, err 307 } 308 leafField.Value = value 309 return topField, nil 310} 311 312// findArray returns an existing table array if one exists at exactly the given key. 313func (d *Decoder) findArray(rkey rootedKey) *openTableArray { 314 for i, arr := range d.openTableArrays { 315 if arr.rkey == rkey { 316 return &d.openTableArrays[i] 317 } 318 } 319 return nil 320} 321 322// findArray returns an existing table array if one exists at exactly the given key 323// or as a prefix to the given key. 324func (d *Decoder) findArrayPrefix(rkey rootedKey) *openTableArray { 325 // TODO(mvdan): see the performance TODO on [Decoder.openTableArrays]. 326 327 // Prefer an exact match over a relative prefix match. 328 if arr := d.findArray(rkey); arr != nil { 329 // TODO: the fact that we need to delete from both structures below 330 // strongly hints towards merging the two structures in some way. 331 // We already have a TODO about making openTableArrays a more efficient structure. 332 333 // When we find an exact match, we must forget about its subkeys 334 // because we're starting an entirely new array element. 335 d.openTableArrays = slices.DeleteFunc(d.openTableArrays, func(arr openTableArray) bool { 336 return strings.HasPrefix(arr.rkey, rkey+".") 337 }) 338 // We also need to forget about seen table keys. 339 maps.DeleteFunc(d.seenTableKeys, func(seenRkey rootedKey, _ bool) bool { 340 return strings.HasPrefix(seenRkey, rkey+".") 341 }) 342 return arr 343 } 344 // The longest relative key match wins. 345 maxLevel := 0 346 var maxLevelArr *openTableArray 347 for i, arr := range d.openTableArrays { 348 if strings.HasPrefix(rkey, arr.rkey+".") && arr.level > maxLevel { 349 maxLevel = arr.level 350 maxLevelArr = &d.openTableArrays[i] 351 } 352 } 353 if maxLevel > 0 { 354 return maxLevelArr 355 } 356 return nil 357} 358 359// tomlKey represents a name with a position which forms part of a TOML dotted key, 360// such as "foo" from "[foo.bar.baz]". 361type tomlKey struct { 362 name string 363 shape toml.Shape 364} 365 366// decodeKey extracts a rootedKey from a TOML node key iterator, 367// appending to the given parent key and returning the unquoted string elements. 368func (d *Decoder) decodeKey(rkey rootedKey, iter toml.Iterator) (rootedKey, []tomlKey) { 369 var elems []tomlKey 370 for iter.Next() { 371 node := iter.Node() 372 name := string(node.Data) 373 // TODO(mvdan): use an append-like API once we have benchmarks 374 if len(rkey) > 0 { 375 rkey += "." 376 } 377 rkey += quoteLabelIfNeeded(name) 378 elems = append(elems, tomlKey{name, d.shape(node)}) 379 } 380 return rkey, elems 381} 382 383// inlineFields constructs a single-line chain of CUE fields joined with structs, 384// so that an input like: 385// 386// ["foo", "bar.baz", "zzz"] 387// 388// results in the CUE fields: 389// 390// foo: "bar.baz": zzz: <nil> 391// 392// The "top" field, in this case "foo", can then be added as an element to a struct. 393// The "leaf" field, in this case "zzz", leaves its value as nil to be filled out. 394func (d *Decoder) inlineFields(tkeys []tomlKey, relPos token.RelPos) (top, leaf *ast.Field) { 395 curField := &ast.Field{ 396 Label: d.label(tkeys[0], relPos), 397 } 398 399 topField := curField 400 for _, tkey := range tkeys[1:] { 401 nextField := &ast.Field{ 402 Label: d.label(tkey, token.Blank), // on the same line 403 } 404 curField.Value = &ast.StructLit{Elts: []ast.Decl{nextField}} 405 curField = nextField 406 } 407 return topField, curField 408} 409 410// quoteLabelIfNeeded quotes a label name only if it needs quoting. 411func quoteLabelIfNeeded(name string) string { 412 if ast.StringLabelNeedsQuoting(name) { 413 return literal.Label.Quote(name) 414 } 415 return name 416} 417 418// label creates an ast.Label that represents a key with exactly the literal string name. 419// This means a quoted string literal for the key "_", as TOML never means "top", 420// as well as for any keys beginning with an underscore, as we don't want to hide any fields. 421// cue/format knows how to quote any other identifiers correctly. 422func (d *Decoder) label(tkey tomlKey, relPos token.RelPos) ast.Label { 423 pos := d.tokenFile.Pos(tkey.shape.Start.Offset, relPos) 424 label := ast.NewStringLabel(tkey.name) 425 ast.SetPos(label, pos) 426 return label 427} 428 429// decodeExpr decodes a single TOML value expression, found on the right side 430// of a `key = value` line. 431func (d *Decoder) decodeExpr(rkey rootedKey, tnode *toml.Node) (ast.Expr, error) { 432 // TODO(mvdan): we currently assume that TOML basic literals (string, int, float) 433 // are also valid CUE literals; we should double check this, perhaps via fuzzing. 434 data := string(tnode.Data) 435 var expr ast.Expr 436 switch tnode.Kind { 437 case toml.String: 438 expr = ast.NewString(data) 439 case toml.Integer: 440 expr = ast.NewLit(token.INT, data) 441 case toml.Float: 442 expr = ast.NewLit(token.FLOAT, data) 443 case toml.Bool: 444 expr = ast.NewBool(data == "true") 445 case toml.Array: 446 list := &ast.ListLit{} 447 elems := tnode.Children() 448 for elems.Next() { 449 key := rkey + "." + strconv.Itoa(len(list.Elts)) 450 elem, err := d.decodeExpr(key, elems.Node()) 451 if err != nil { 452 return nil, err 453 } 454 list.Elts = append(list.Elts, elem) 455 } 456 expr = list 457 case toml.InlineTable: 458 strct := &ast.StructLit{ 459 // We want a single-line struct, just like TOML's inline tables are on a single line. 460 Lbrace: token.NoPos.WithRel(token.Blank), 461 Rbrace: token.NoPos.WithRel(token.Blank), 462 } 463 elems := tnode.Children() 464 for elems.Next() { 465 // Inline table fields are on the same line. 466 field, err := d.decodeField(rkey, elems.Node(), token.Blank) 467 if err != nil { 468 return nil, err 469 } 470 strct.Elts = append(strct.Elts, field) 471 } 472 expr = strct 473 case toml.LocalDate, toml.LocalTime, toml.LocalDateTime, toml.DateTime: 474 // CUE does not have native date nor time literal kinds, 475 // so we decode these as strings exactly as they came in 476 // and we validate them with time.Format using the corresponding format string. 477 // Not only does this ensure that the resulting CUE can be used with our time package, 478 // but it also means that we can roundtrip a TOML timestamp without confusing it for a string. 479 var format ast.Expr 480 switch tnode.Kind { 481 case toml.LocalDate: 482 // TODO(mvdan): rename time.RFC3339Date to time.DateOnly to mirror Go 483 format = ast.NewSel(&ast.Ident{ 484 Name: "time", 485 Node: ast.NewImport(nil, "time"), 486 }, "RFC3339Date") 487 case toml.LocalTime: 488 // TODO(mvdan): add TimeOnly to CUE's time package to mirror Go 489 format = ast.NewString(time.TimeOnly) 490 case toml.LocalDateTime: 491 // RFC3339 minus the timezone; this seems like a format peculiar to TOML. 492 format = ast.NewString("2006-01-02T15:04:05") 493 default: // DateTime 494 format = ast.NewSel(&ast.Ident{ 495 Name: "time", 496 Node: ast.NewImport(nil, "time"), 497 }, "RFC3339") 498 } 499 expr = ast.NewBinExpr(token.AND, ast.NewString(data), ast.NewCall( 500 ast.NewSel(&ast.Ident{ 501 Name: "time", 502 Node: ast.NewImport(nil, "time"), 503 }, "Format"), format), 504 ) 505 default: 506 return nil, fmt.Errorf("encoding/toml.Decoder.decodeExpr: unknown %s %#v", tnode.Kind, tnode) 507 } 508 // TODO(mvdan): some go-toml nodes such as Kind=toml.Bool do not seem to have a Raw Range 509 // which would let us grab their position information; fix this upstream. 510 if tnode.Raw.Length > 0 { 511 ast.SetPos(expr, d.tokenFile.Pos(d.shape(tnode).Start.Offset, token.NoRelPos)) 512 } 513 return expr, nil 514}