this repo has no description
at master 1226 lines 35 kB view raw
1// Copyright 2019 CUE Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15package jsonschema 16 17// TODO: 18// - replace converter from YAML to CUE to CUE (schema) to CUE. 19// - define OpenAPI definitions als CUE. 20 21import ( 22 "fmt" 23 "math" 24 "net/url" 25 "regexp" 26 "regexp/syntax" 27 "slices" 28 "strconv" 29 "strings" 30 31 "cuelang.org/go/cue" 32 "cuelang.org/go/cue/ast" 33 "cuelang.org/go/cue/ast/astutil" 34 "cuelang.org/go/cue/errors" 35 "cuelang.org/go/cue/token" 36 "cuelang.org/go/internal" 37) 38 39const ( 40 // DefaultRootID is used as the absolute base URI for a schema 41 // when no value is provided in [Config.ID]. 42 DefaultRootID = "https://" + DefaultRootIDHost 43 DefaultRootIDHost = "cue.jsonschema.invalid" 44) 45 46// rootDefs defines the top-level name of the map of definitions that do not 47// have a valid identifier name. 48// 49// TODO: find something more principled, like allowing #("a-b"). 50const rootDefs = "#" 51 52// A decoder converts JSON schema to CUE. 53type decoder struct { 54 cfg *Config 55 errs errors.Error 56 mapURLErrors map[string]bool 57 58 root cue.Value 59 rootID *url.URL 60 61 // defForValue holds an entry for internal values 62 // that are known to map to a defined schema. 63 // A nil entry is stored for nodes that have been 64 // referred to but we haven't yet seen when walking 65 // the schemas. 66 defForValue *valueMap[*definedSchema] 67 68 // danglingRefs records the number of nil entries in defForValue, 69 // representing the number of references into the internal 70 // structure that have not yet been resolved. 71 danglingRefs int 72 73 // defs holds the set of named schemas, indexed by URI (both 74 // canonical, and root-relative if known), including external 75 // schemas that aren't known. 76 defs map[string]*definedSchema 77 78 // builder is used to build the final syntax tree as it becomes known. 79 builder structBuilder 80 81 // needAnotherPass is set to true when we know that 82 // we need another pass through the schema extraction 83 // process. This can happen because `MapRef` might choose 84 // a different location depending on whether a reference is local 85 // or external. We don't know that until we've traversed the 86 // entire schema and the `$ref` might be seen before the 87 // schema it's referring to. Still more passes might be required 88 // if a $ref is found to be referring to a node that would not normally 89 // be considered part of the schema data. 90 needAnotherPass bool 91} 92 93// definedSchema records information for a schema or subschema. 94type definedSchema struct { 95 // importPath is empty for internal schemas. 96 importPath string 97 98 // path holds the location of the schema relative to importPath. 99 path cue.Path 100 101 // schema holds the actual syntax for the schema. This 102 // is nil if the entry was created by a reference only. 103 schema ast.Expr 104 105 // comment holds any doc comment associated with the above schema. 106 comment *ast.CommentGroup 107} 108 109// addImport registers 110func (d *decoder) addImport(n cue.Value, pkg string) *ast.Ident { 111 spec := ast.NewImport(nil, pkg) 112 info, err := astutil.ParseImportSpec(spec) 113 if err != nil { 114 d.errf(cue.Value{}, "invalid import %q", pkg) 115 } 116 ident := ast.NewIdent(info.Ident) 117 ident.Node = spec 118 ast.SetPos(ident, n.Pos()) 119 120 return ident 121} 122 123func (d *decoder) decode(v cue.Value) *ast.File { 124 var defsRoot cue.Value 125 // docRoot represents the root of the actual data, by contrast 126 // with the "root" value as specified in [Config.Root] which 127 // represents the root of the schemas to be decoded. 128 docRoot := v 129 if d.cfg.Root != "" { 130 rootPath, err := parseRootRef(d.cfg.Root) 131 if err != nil { 132 d.errf(cue.Value{}, "invalid Config.Root value %q: %v", d.cfg.Root, err) 133 return nil 134 } 135 root := v.LookupPath(rootPath) 136 if !root.Exists() && !d.cfg.AllowNonExistentRoot { 137 d.errf(v, "root value at path %v does not exist", d.cfg.Root) 138 return nil 139 } 140 if d.cfg.SingleRoot { 141 v = root 142 } else { 143 if !root.Exists() { 144 root = v.Context().CompileString("{}") 145 } 146 if root.Kind() != cue.StructKind { 147 d.errf(root, "value at path %v must be struct containing definitions but is actually %v", d.cfg.Root, root) 148 return nil 149 } 150 defsRoot = root 151 } 152 } 153 154 var rootInfo schemaInfo 155 // extraSchemas records any nodes that are referred to 156 // but not part of the regular schema traversal. 157 var extraSchemas []cue.Value 158 // basePass records the last time that any new schemas were 159 // added for inspection. This can be set whenever new schemas 160 // not part of the regular traversal are found. 161 basePass := 0 162 163 for pass := 0; ; pass++ { 164 if pass > 10 { 165 // Should never happen: the most we should ever see in practice 166 // should be 2, but some pathological cases could end up with more. 167 d.errf(v, "internal error: too many passes without resolution") 168 return nil 169 } 170 root := &state{ 171 decoder: d, 172 schemaInfo: schemaInfo{ 173 schemaVersion: d.cfg.DefaultVersion, 174 id: d.rootID, 175 }, 176 isRoot: true, 177 pos: docRoot, 178 } 179 180 if defsRoot.Exists() { 181 // When d.cfg.Root is non-empty, it points to a struct 182 // containing a field for each definition. 183 constraintAddDefinitions("schemas", defsRoot, root) 184 } else { 185 expr, state := root.schemaState(v, allTypes, func(s *state) { 186 // We want the top level state to be treated as root even 187 // though it's some levels below the actual document top level. 188 s.isRoot = true 189 }) 190 if state.allowedTypes == 0 { 191 root.errf(v, "constraints are not possible to satisfy") 192 return nil 193 } 194 if !d.builder.put(cue.Path{}, expr, state.comment()) { 195 root.errf(v, "duplicate definition at root") // TODO better error message 196 return nil 197 } 198 rootInfo = state 199 } 200 if d.danglingRefs > 0 && pass == basePass+1 { 201 // There are still dangling references but we've been through the 202 // schema twice, so we know that there's a reference 203 // to a non-schema node. Technically this is not necessarily valid, 204 // but we do see this in the wild. This should be rare, 205 // so efficiency (re-parsing paths) shouldn't be a great issue. 206 for path, def := range d.defForValue.byPath { 207 if def != nil { 208 continue 209 } 210 n := d.root.LookupPath(cue.ParsePath(path)) 211 if !n.Exists() { 212 panic("failed to find entry for dangling reference") 213 } 214 extraSchemas = append(extraSchemas, n) 215 basePass = pass 216 } 217 } 218 for _, n := range extraSchemas { 219 // As the ID namespace isn't well-defined we treat all such 220 // schemas as if they were directly under the root. 221 // See https://json-schema.org/draft/2020-12/json-schema-core#section-9.4.2 222 root.schema(n) 223 } 224 if !d.needAnotherPass && d.danglingRefs == 0 { 225 break 226 } 227 228 d.builder = structBuilder{} 229 for _, def := range d.defs { 230 def.schema = nil 231 } 232 d.needAnotherPass = false 233 } 234 if d.cfg.DefineSchema != nil { 235 // Let the caller know about any internal schemas that 236 // have been mapped to an external location. 237 for _, def := range d.defs { 238 if def.schema != nil && def.importPath != "" { 239 d.cfg.DefineSchema(def.importPath, def.path, def.schema, def.comment) 240 } 241 } 242 } 243 f, err := d.builder.syntax() 244 if err != nil { 245 d.errf(v, "cannot build final syntax: %v", err) 246 return nil 247 } 248 var preamble []ast.Decl 249 if d.cfg.PkgName != "" { 250 preamble = append(preamble, &ast.Package{Name: ast.NewIdent(d.cfg.PkgName)}) 251 } 252 if rootInfo.schemaVersionPresent { 253 // TODO use cue/literal.String 254 // TODO is this actually useful information: why is knowing the schema 255 // version of the input useful? 256 preamble = append(preamble, &ast.Attribute{ 257 Text: fmt.Sprintf("@jsonschema(schema=%q)", rootInfo.schemaVersion), 258 }) 259 } 260 if rootInfo.deprecated { 261 preamble = append(preamble, &ast.Attribute{Text: "@deprecated()"}) 262 } 263 if len(preamble) > 0 { 264 f.Decls = append(preamble, f.Decls...) 265 } 266 return f 267} 268 269func (d *decoder) errf(n cue.Value, format string, args ...interface{}) ast.Expr { 270 d.warnf(n.Pos(), format, args...) 271 return &ast.BadExpr{From: n.Pos()} 272} 273 274func (d *decoder) warnf(p token.Pos, format string, args ...interface{}) { 275 d.addErr(errors.Newf(p, format, args...)) 276} 277 278func (d *decoder) addErr(err errors.Error) { 279 d.errs = errors.Append(d.errs, err) 280} 281 282func (d *decoder) number(n cue.Value) ast.Expr { 283 return n.Syntax(cue.Final()).(ast.Expr) 284} 285 286func (d *decoder) uint(nv cue.Value) ast.Expr { 287 n, err := uint64Value(nv) 288 if err != nil { 289 d.errf(nv, "invalid uint") 290 } 291 return &ast.BasicLit{ 292 ValuePos: nv.Pos(), 293 Kind: token.FLOAT, 294 Value: strconv.FormatUint(n, 10), 295 } 296} 297 298func (d *decoder) boolValue(n cue.Value) bool { 299 x, err := n.Bool() 300 if err != nil { 301 d.errf(n, "invalid bool") 302 } 303 return x 304} 305 306func (d *decoder) string(n cue.Value) ast.Expr { 307 return n.Syntax(cue.Final()).(ast.Expr) 308} 309 310func (d *decoder) strValue(n cue.Value) (s string, ok bool) { 311 s, err := n.String() 312 if err != nil { 313 d.errf(n, "invalid string") 314 return "", false 315 } 316 return s, true 317} 318 319func (d *decoder) regexpValue(n cue.Value) (ast.Expr, bool) { 320 s, ok := d.strValue(n) 321 if !ok { 322 return nil, false 323 } 324 if !d.checkRegexp(n, s) { 325 return nil, false 326 } 327 return d.string(n), true 328} 329 330func (d *decoder) checkRegexp(n cue.Value, s string) bool { 331 _, err := syntax.Parse(s, syntax.Perl) 332 if err == nil { 333 return true 334 } 335 var regErr *syntax.Error 336 if errors.As(err, &regErr) { 337 switch regErr.Code { 338 case syntax.ErrInvalidPerlOp: 339 // It's Perl syntax that we'll never support because the CUE evaluation 340 // engine uses Go's regexp implementation and because the missing 341 // features are usually not there for good reason (e.g. exponential 342 // runtime). In other words, this is a missing feature but not an invalid 343 // regular expression as such. 344 if d.cfg.StrictFeatures { 345 // TODO: could fall back to https://github.com/dlclark/regexp2 instead 346 d.errf(n, "unsupported Perl regexp syntax in %q: %v", s, err) 347 } 348 return false 349 case syntax.ErrInvalidCharRange: 350 // There are many more character class ranges than Go supports currently 351 // (see https://go.dev/issue/14509) so treat an unknown character class 352 // range as a feature error rather than a bad regexp. 353 // TODO translate names to Go-supported class names when possible. 354 if d.cfg.StrictFeatures { 355 d.errf(n, "unsupported regexp character class in %q: %v", s, err) 356 } 357 return false 358 } 359 } 360 d.errf(n, "invalid regexp %q: %v", s, err) 361 return false 362} 363 364// ensureDefinition ensures that node n will 365// be a defined schema. 366func (d *decoder) ensureDefinition(n cue.Value) { 367 if _, ok := d.defForValue.lookup(n); !ok { 368 d.defForValue.set(n, nil) 369 d.danglingRefs++ 370 } 371} 372 373// const draftCutoff = 5 374 375type coreType int 376 377const ( 378 nullType coreType = iota 379 boolType 380 numType 381 stringType 382 arrayType 383 objectType 384 385 numCoreTypes 386) 387 388var coreToCUE = []cue.Kind{ 389 nullType: cue.NullKind, 390 boolType: cue.BoolKind, 391 numType: cue.NumberKind, // Note: both int and float. 392 stringType: cue.StringKind, 393 arrayType: cue.ListKind, 394 objectType: cue.StructKind, 395} 396 397func kindToAST(k cue.Kind, explicitOpen bool) ast.Expr { 398 switch k { 399 case cue.NullKind: 400 // TODO: handle OpenAPI restrictions. 401 return ast.NewNull() 402 case cue.BoolKind: 403 return ast.NewIdent("bool") 404 case cue.NumberKind: 405 return ast.NewIdent("number") 406 case cue.IntKind: 407 return ast.NewIdent("int") 408 case cue.FloatKind: 409 return ast.NewIdent("float") 410 case cue.StringKind: 411 return ast.NewIdent("string") 412 case cue.ListKind: 413 return ast.NewList(&ast.Ellipsis{}) 414 case cue.StructKind: 415 if explicitOpen { 416 return ast.NewStruct() 417 } 418 return ast.NewStruct(&ast.Ellipsis{}) 419 } 420 panic(fmt.Errorf("unexpected kind %v", k)) 421} 422 423var coreTypeName = []string{ 424 nullType: "null", 425 boolType: "bool", 426 numType: "number", 427 stringType: "string", 428 arrayType: "array", 429 objectType: "object", 430} 431 432type constraintInfo struct { 433 // typ is an identifier for the root type, if present. 434 // This can be omitted if there are constraints. 435 typ ast.Expr 436 constraints []ast.Expr 437} 438 439func (c *constraintInfo) setTypeUsed(n cue.Value, t coreType, explicitOpen bool) { 440 c.typ = kindToAST(coreToCUE[t], explicitOpen) 441 setPos(c.typ, n) 442 ast.SetRelPos(c.typ, token.NoRelPos) 443} 444 445func (c *constraintInfo) add(n cue.Value, x ast.Expr) { 446 if !isTop(x) { 447 setPos(x, n) 448 ast.SetRelPos(x, token.NoRelPos) 449 c.constraints = append(c.constraints, x) 450 } 451} 452 453func (s *state) add(n cue.Value, t coreType, x ast.Expr) { 454 s.types[t].add(n, x) 455} 456 457func (s *state) setTypeUsed(n cue.Value, t coreType) { 458 if int(t) >= len(s.types) { 459 panic(fmt.Errorf("type out of range %v/%v", int(t), len(s.types))) 460 } 461 s.types[t].setTypeUsed(n, t, s.cfg.OpenOnlyWhenExplicit) 462} 463 464type state struct { 465 *decoder 466 schemaInfo 467 468 up *state 469 470 pos cue.Value 471 472 // The constraints in types represent disjunctions per type. 473 types [numCoreTypes]constraintInfo 474 all constraintInfo // values and oneOf etc. 475 nullable *ast.BasicLit // nullable 476 477 exclusiveMin bool // For OpenAPI and legacy support. 478 exclusiveMax bool // For OpenAPI and legacy support. 479 480 // isRoot holds whether this state is at the root 481 // of the schema. 482 isRoot bool 483 484 minContains *uint64 485 maxContains *uint64 486 487 ifConstraint cue.Value 488 thenConstraint cue.Value 489 elseConstraint cue.Value 490 491 definitions []ast.Decl 492 493 // Used for inserting definitions, properties, etc. 494 obj *ast.StructLit 495 objN cue.Value // used for adding obj to constraints 496 497 patterns []ast.Expr 498 499 list *ast.ListLit 500 501 // listItemsIsArray keeps track of whether the 502 // value of the "items" keyword is an array. 503 // Without this, we can't distinguish between 504 // 505 // "items": true 506 // 507 // and 508 // 509 // "items": [] 510 listItemsIsArray bool 511 512 // The following fields are used when the version is 513 // [VersionKubernetesCRD] to check that "properties" and 514 // "additionalProperties" may not be specified together. 515 hasProperties bool 516 hasAdditionalProperties bool 517 518 // Keep track of whether "items" and "type": "array" have been specified, because 519 // in OpenAPI it's mandatory when "type" is "array". 520 hasItems bool 521 isArray bool 522 523 // Keep track of whether a $ref keyword is present, 524 // because pre-2019-09 schemas ignore sibling keywords 525 // to $ref. 526 hasRefKeyword bool 527 528 // Keep track of whether we're preserving existing fields, 529 // which is preserved recursively by default, and is 530 // reset within properties or additionalProperties. 531 preserveUnknownFields bool 532 533 // k8sResourceKind and k8sAPIVersion record values from the 534 // x-kubernetes-group-version-kind keyword 535 // for the kind and apiVersion properties respectively. 536 k8sResourceKind string 537 k8sAPIVersion string 538 539 // Keep track of whether the object has been explicitly 540 // closed or opened (see [Config.OpenOnlyWhenExplicit]). 541 openness openness 542} 543 544type openness int 545 546const ( 547 implicitlyOpen openness = iota 548 explicitlyOpen // explicitly opened, e.g. additionalProperties: true 549 explicitlyClosed // explicitly closed, e.g. additionalProperties: false 550 allFieldsCovered // complete pattern present, e.g. additionalProperties: type: string 551) 552 553// schemaInfo holds information about a schema 554// after it has been created. 555type schemaInfo struct { 556 // allowedTypes holds the set of types that 557 // this node is allowed to be. 558 allowedTypes cue.Kind 559 560 // knownTypes holds the set of types that this node 561 // is known to be one of by virtue of the constraints inside 562 // all. This is used to avoid adding redundant elements 563 // to the disjunction created by [state.finalize]. 564 knownTypes cue.Kind 565 566 title string 567 description string 568 569 // id holds the absolute URI of the schema if has a $id field . 570 // It's the base URI for $ref or nested $id fields. 571 id *url.URL 572 deprecated bool 573 574 schemaVersion Version 575 schemaVersionPresent bool 576 577 hasConstraints bool 578} 579 580func (s *state) idTag() *ast.Attribute { 581 return &ast.Attribute{Text: fmt.Sprintf("@jsonschema(id=%q)", s.id)} 582} 583 584func (s *state) object(n cue.Value) *ast.StructLit { 585 if s.obj == nil { 586 s.obj = &ast.StructLit{} 587 s.objN = n 588 } 589 return s.obj 590} 591 592func (s *state) finalizeObject() { 593 if s.obj == nil && s.schemaVersion == VersionKubernetesCRD && (s.allowedTypes&cue.StructKind) != 0 && s.preserveUnknownFields { 594 // When x-kubernetes-preserve-unknown-fields is set, we need 595 // an explicit ellipsis even though kindToAST won't have added 596 // one, so make sure there's an object. 597 _ = s.object(s.pos) 598 } 599 if s.obj == nil { 600 return 601 } 602 if s.preserveUnknownFields { 603 s.openness = explicitlyOpen 604 } 605 var e ast.Expr = s.obj 606 if s.cfg.OpenOnlyWhenExplicit && s.openness == implicitlyOpen { 607 // Nothing to do: the struct is implicitly open but 608 // we've been directed to leave it like that. 609 } else if s.openness == allFieldsCovered { 610 // Nothing to do: there is a pattern constraint that covers all 611 // possible fields. 612 } else if s.openness == explicitlyClosed { 613 e = ast.NewCall(ast.NewIdent("close"), s.obj) 614 } else { 615 s.obj.Elts = append(s.obj.Elts, &ast.Ellipsis{}) 616 } 617 s.add(s.objN, objectType, e) 618} 619 620func (s *state) hasConstraints() bool { 621 if len(s.all.constraints) > 0 { 622 return true 623 } 624 for _, t := range s.types { 625 if len(t.constraints) > 0 { 626 return true 627 } 628 } 629 return len(s.patterns) > 0 || 630 s.title != "" || 631 s.description != "" || 632 s.obj != nil || 633 s.id != nil 634} 635 636const allTypes = cue.BoolKind | 637 cue.ListKind | 638 cue.NullKind | 639 cue.NumberKind | 640 cue.IntKind | 641 cue.StringKind | 642 cue.StructKind 643 644// finalize constructs CUE syntax from the collected constraints. 645func (s *state) finalize() (e ast.Expr) { 646 if s.allowedTypes == 0 { 647 // Nothing is possible. This isn't a necessarily a problem, as 648 // we might be inside an allOf or oneOf with other valid constraints. 649 return errorDisallowed() 650 } 651 652 s.finalizeObject() 653 654 conjuncts := []ast.Expr{} 655 disjuncts := []ast.Expr{} 656 657 // Sort literal structs and list last for nicer formatting. 658 // Use a stable sort so that the relative order of constraints 659 // is otherwise kept as-is, for the sake of deterministic output. 660 slices.SortStableFunc(s.types[arrayType].constraints, func(a, b ast.Expr) int { 661 _, aList := a.(*ast.ListLit) 662 _, bList := b.(*ast.ListLit) 663 return cmpBool(aList, bList) 664 }) 665 slices.SortStableFunc(s.types[objectType].constraints, func(a, b ast.Expr) int { 666 _, aStruct := a.(*ast.StructLit) 667 _, bStruct := b.(*ast.StructLit) 668 return cmpBool(aStruct, bStruct) 669 }) 670 671 type excludeInfo struct { 672 pos token.Pos 673 typIndex int 674 } 675 var excluded []excludeInfo 676 677 needsTypeDisjunction := s.allowedTypes != s.knownTypes 678 if !needsTypeDisjunction { 679 for i, t := range s.types { 680 k := coreToCUE[i] 681 if len(t.constraints) > 0 && s.allowedTypes&k != 0 { 682 // We need to include at least one type-specific 683 // constraint in the disjunction. 684 needsTypeDisjunction = true 685 break 686 } 687 } 688 } 689 690 if needsTypeDisjunction { 691 npossible := 0 692 nexcluded := 0 693 for i, t := range s.types { 694 k := coreToCUE[i] 695 allowed := s.allowedTypes&k != 0 696 switch { 697 case len(t.constraints) > 0: 698 npossible++ 699 if !allowed { 700 nexcluded++ 701 for _, c := range t.constraints { 702 excluded = append(excluded, excludeInfo{c.Pos(), i}) 703 } 704 continue 705 } 706 x := ast.NewBinExpr(token.AND, t.constraints...) 707 disjuncts = append(disjuncts, x) 708 case allowed: 709 npossible++ 710 if s.knownTypes&k != 0 { 711 disjuncts = append(disjuncts, kindToAST(k, s.cfg.OpenOnlyWhenExplicit)) 712 } 713 } 714 } 715 if nexcluded == npossible { 716 // All possibilities have been excluded: this is an impossible 717 // schema. 718 for _, e := range excluded { 719 s.addErr(errors.Newf(e.pos, 720 "constraint not allowed because type %s is excluded", 721 coreTypeName[e.typIndex], 722 )) 723 } 724 } 725 } 726 conjuncts = append(conjuncts, s.all.constraints...) 727 728 if len(disjuncts) > 0 { 729 conjuncts = append(conjuncts, ast.NewBinExpr(token.OR, disjuncts...)) 730 } 731 732 if len(conjuncts) == 0 { 733 // There are no conjuncts, which can only happen when there 734 // are no disjuncts, which can only happen when the entire 735 // set of disjuncts is redundant with respect to the types 736 // already implied by s.all. As we've already checked that 737 // s.allowedTypes is non-zero (so we know that 738 // it's not bottom) and we need _some_ expression 739 // to be part of the subequent syntax, we use top. 740 e = top() 741 } else { 742 e = ast.NewBinExpr(token.AND, conjuncts...) 743 } 744 745 a := []ast.Expr{e} 746 if s.nullable != nil { 747 a = []ast.Expr{s.nullable, e} 748 } 749 750 e = ast.NewBinExpr(token.OR, a...) 751 752 if len(s.definitions) > 0 { 753 if st, ok := e.(*ast.StructLit); ok { 754 st.Elts = append(st.Elts, s.definitions...) 755 } else { 756 st = ast.NewStruct() 757 st.Elts = append(st.Elts, &ast.EmbedDecl{Expr: e}) 758 st.Elts = append(st.Elts, s.definitions...) 759 e = st 760 } 761 } 762 763 // If an "$id" exists, make sure it's present in the output. 764 if s.id != nil { 765 if st, ok := e.(*ast.StructLit); ok { 766 st.Elts = append([]ast.Decl{s.idTag()}, st.Elts...) 767 } else { 768 e = &ast.StructLit{Elts: []ast.Decl{s.idTag(), &ast.EmbedDecl{Expr: e}}} 769 } 770 } 771 772 // Now that we've expressed the schema as actual syntax, 773 // all the allowed types are actually explicit and will not 774 // need to be mentioned again. 775 s.knownTypes = s.allowedTypes 776 return e 777} 778 779// cmpBool returns 780// 781// -1 if x is less than y, 782// 0 if x equals y, 783// +1 if x is greater than y, 784// 785// where false is ordered before true. 786func cmpBool(x, y bool) int { 787 switch { 788 case !x && y: 789 return -1 790 case x && !y: 791 return +1 792 default: 793 return 0 794 } 795} 796 797func (s schemaInfo) comment() *ast.CommentGroup { 798 // Create documentation. 799 doc := strings.TrimSpace(s.title) 800 if s.description != "" { 801 if doc != "" { 802 doc += "\n\n" 803 } 804 doc += s.description 805 doc = strings.TrimSpace(doc) 806 } 807 // TODO: add examples as well? 808 if doc == "" { 809 return nil 810 } 811 return internal.NewComment(true, doc) 812} 813 814func (s *state) schema(n cue.Value) ast.Expr { 815 expr, _ := s.schemaState(n, allTypes, nil) 816 return expr 817} 818 819// schemaState returns a new state value derived from s. 820// n holds the JSONSchema node to translate to a schema. 821// types holds the set of possible types that the value can hold. 822// 823// If init is not nil, it is called on the newly created state value 824// before doing anything else. 825func (s0 *state) schemaState(n cue.Value, types cue.Kind, init func(*state)) (expr ast.Expr, info schemaInfo) { 826 s := &state{ 827 up: s0, 828 schemaInfo: schemaInfo{ 829 schemaVersion: s0.schemaVersion, 830 allowedTypes: types, 831 knownTypes: allTypes, 832 }, 833 decoder: s0.decoder, 834 pos: n, 835 isRoot: s0.isRoot && n == s0.pos, 836 preserveUnknownFields: s0.preserveUnknownFields, 837 } 838 if init != nil { 839 init(s) 840 } 841 defer func() { 842 // Perhaps replace the schema expression with a reference. 843 expr = s.maybeDefine(expr, info) 844 }() 845 if n.Kind() == cue.BoolKind { 846 if s.schemaVersion.is(vfrom(VersionDraft6)) { 847 // From draft6 onwards, boolean values signify a schema that always passes or fails. 848 // TODO if false, set s.allowedTypes and s.knownTypes to zero? 849 return boolSchema(s.boolValue(n)), s.schemaInfo 850 } 851 return s.errf(n, "boolean schemas not supported in %v", s.schemaVersion), s.schemaInfo 852 } 853 if n.Kind() != cue.StructKind { 854 return s.errf(n, "schema expects mapping node, found %s", n.Kind()), s.schemaInfo 855 } 856 857 // do multiple passes over the constraints to ensure they are done in order. 858 for pass := 0; pass < numPhases; pass++ { 859 s.processMap(n, func(key string, value cue.Value) { 860 if pass == 0 && key == "$ref" { 861 // Before 2019-19, keywords alongside $ref are ignored so keep 862 // track of whether we've seen any non-$ref keywords so we can 863 // ignore those keywords. This could apply even when the schema 864 // is >=2019-19 because $schema could be used to change the version. 865 s.hasRefKeyword = true 866 } 867 // Convert each constraint into a either a value or a functor. 868 c := constraintMap[key] 869 if c == nil { 870 if strings.HasPrefix(key, "x-") { 871 // A keyword starting with a leading x- is clearly 872 // not intended to be a valid keyword, and is explicitly 873 // allowed by OpenAPI. It seems reasonable that 874 // this is not an error even with StrictKeywords enabled. 875 return 876 } 877 if pass == 0 && s.cfg.StrictKeywords { 878 // TODO: value is not the correct position, albeit close. Fix this. 879 s.warnUnrecognizedKeyword(key, value, "unknown keyword %q", key) 880 } 881 return 882 } 883 if c.phase != pass { 884 return 885 } 886 if !s.schemaVersion.is(c.versions) { 887 s.warnUnrecognizedKeyword(key, value, "keyword %q is not supported in JSON schema version %v", key, s.schemaVersion) 888 return 889 } 890 if pass > 0 && !s.schemaVersion.is(vfrom(VersionDraft2019_09)) && s.hasRefKeyword && key != "$ref" { 891 // We're using a schema version that ignores keywords alongside $ref. 892 // 893 // Note that we specifically exclude pass 0 (the pass in which $schema is checked) 894 // from this check, because hasRefKeyword is only set in pass 0 and we 895 // can get into a self-contradictory situation ($schema says we should 896 // ignore keywords alongside $ref, but $ref says we should ignore the $schema 897 // keyword itself). We could make that situation an explicit error, but other 898 // implementations don't, and it would require an entire extra pass just to do so. 899 s.warnUnrecognizedKeyword(key, value, "ignoring keyword %q alongside $ref", key) 900 return 901 } 902 c.fn(key, value, s) 903 }) 904 if s.schemaVersion == VersionKubernetesCRD && s.isRoot { 905 // The root of a CRD is always a resource, so treat it as if it contained 906 // the x-kubernetes-embedded-resource keyword 907 // TODO remove this behavior now that we have an explicit 908 // ExtractCRDs function which does a better job at doing this. 909 c := constraintMap["x-kubernetes-embedded-resource"] 910 if c.phase != pass { 911 continue 912 } 913 // Note: there is no field value for the embedded-resource keyword, 914 // but it's not actually used except for its position so passing 915 // the parent object should work fine. 916 c.fn("x-kubernetes-embedded-resource", n, s) 917 } 918 } 919 if s.id != nil { 920 // If there's an ID, it can be referred to. 921 s.ensureDefinition(s.pos) 922 } 923 constraintIfThenElse(s) 924 if s.schemaVersion == VersionKubernetesCRD { 925 if s.hasProperties && s.hasAdditionalProperties { 926 s.errf(n, "additionalProperties may not be combined with properties in %v", s.schemaVersion) 927 } 928 } 929 if s.schemaVersion.is(openAPILike) { 930 if s.isArray && !s.hasItems { 931 // From https://github.com/OAI/OpenAPI-Specification/blob/3.0.0/versions/3.0.0.md#schema-object 932 // "`items` MUST be present if the `type` is `array`." 933 s.errf(n, `"items" must be present when the "type" is "array" in %v`, s.schemaVersion) 934 } 935 } 936 937 schemaExpr := s.finalize() 938 s.schemaInfo.hasConstraints = s.hasConstraints() 939 return schemaExpr, s.schemaInfo 940} 941 942func (s *state) warnUnrecognizedKeyword(key string, n cue.Value, msg string, args ...any) { 943 if !s.cfg.StrictKeywords { 944 return 945 } 946 if s.schemaVersion.is(openAPILike) && strings.HasPrefix(key, "x-") { 947 // Unimplemented x- keywords are allowed even with strict keywords 948 // under OpenAPI-like versions, because those versions enable 949 // strict keywords by default. 950 return 951 } 952 s.errf(n, msg, args...) 953} 954 955// maybeDefine checks whether we might need a definition 956// for n given its actual schema syntax expression. If 957// it does, it creates the definition as appropriate and returns 958// an expression that refers to that definition; if not, 959// it just returns expr itself. 960// TODO also report whether the schema has been defined at a place 961// where it can be unified with something else? 962func (s *state) maybeDefine(expr ast.Expr, info schemaInfo) ast.Expr { 963 def := s.definedSchemaForNode(s.pos) 964 if def == nil || len(def.path.Selectors()) == 0 { 965 return expr 966 } 967 def.schema = expr 968 def.comment = info.comment() 969 if def.importPath == "" { 970 // It's a local definition that's not at the root. 971 if !s.builder.put(def.path, expr, s.comment()) { 972 s.errf(s.pos, "redefinition of schema CUE path %v", def.path) 973 return expr 974 } 975 } 976 return s.refExpr(s.pos, def.importPath, def.path) 977} 978 979// definedSchemaForNode returns the definedSchema value 980// for the given node in the JSON schema, or nil 981// if the node does not need a definition. 982func (s *state) definedSchemaForNode(n cue.Value) *definedSchema { 983 def, ok := s.defForValue.lookup(n) 984 if !ok { 985 return nil 986 } 987 if def != nil { 988 // We've either made a definition in a previous pass 989 // or it's a redefinition. 990 // TODO if it's a redefinition, error. 991 return def 992 } 993 // This node has been referred to but not actually defined. We'll 994 // need another pass to sort out the reference even though the 995 // reference is no longer dangling. 996 s.needAnotherPass = true 997 998 def = s.addDefinition(n) 999 if def == nil { 1000 return nil 1001 } 1002 s.defForValue.set(n, def) 1003 s.danglingRefs-- 1004 return def 1005} 1006 1007func (s *state) addDefinition(n cue.Value) *definedSchema { 1008 var loc SchemaLoc 1009 schemaRoot := s.schemaRoot() 1010 loc.ID = ref(*schemaRoot.id) 1011 loc.ID.Fragment = mustCUEPathToJSONPointer(relPath(n, schemaRoot.pos)) 1012 idStr := loc.ID.String() 1013 def, ok := s.defs[idStr] 1014 if ok { 1015 // We've already got a definition for this ID. 1016 // TODO if it's been defined in the same pass, then it's a redefinition 1017 // s.errf(n, "redefinition of schema %s at %v", idStr, n.Path()) 1018 return def 1019 } 1020 loc.IsLocal = true 1021 loc.Path = relPath(n, s.root) 1022 importPath, path, err := s.cfg.MapRef(loc) 1023 if err != nil { 1024 s.errf(n, "cannot get reference for %v: %v", loc, err) 1025 return nil 1026 } 1027 def = &definedSchema{ 1028 importPath: importPath, 1029 path: path, 1030 } 1031 s.defs[idStr] = def 1032 return def 1033} 1034 1035// refExpr returns a CUE expression to refer to the given path within the given 1036// imported CUE package. If importPath is empty, it returns a reference 1037// relative to the root of the schema being generated. 1038func (s *state) refExpr(n cue.Value, importPath string, path cue.Path) ast.Expr { 1039 if importPath == "" { 1040 // Internal reference 1041 expr, err := s.builder.getRef(path) 1042 if err != nil { 1043 s.errf(n, "cannot generate reference: %v", err) 1044 return nil 1045 } 1046 return expr 1047 } 1048 // External reference 1049 ip := ast.ParseImportPath(importPath) 1050 if ip.Qualifier == "" { 1051 // TODO choose an arbitrary name here. 1052 s.errf(n, "cannot determine package name from import path %q", importPath) 1053 return nil 1054 } 1055 ident := ast.NewIdent(ip.Qualifier) 1056 ident.Node = &ast.ImportSpec{Path: ast.NewString(importPath)} 1057 expr, err := pathRefSyntax(path, ident) 1058 if err != nil { 1059 s.errf(n, "cannot determine CUE path: %v", err) 1060 return nil 1061 } 1062 return expr 1063} 1064 1065func (s *state) constValue(n cue.Value) ast.Expr { 1066 k := n.Kind() 1067 switch k { 1068 case cue.ListKind: 1069 a := []ast.Expr{} 1070 for i, _ := n.List(); i.Next(); { 1071 a = append(a, s.constValue(i.Value())) 1072 } 1073 return setPos(ast.NewList(a...), n) 1074 1075 case cue.StructKind: 1076 a := []ast.Decl{} 1077 s.processMap(n, func(key string, n cue.Value) { 1078 a = append(a, &ast.Field{ 1079 Label: ast.NewString(key), 1080 Value: s.constValue(n), 1081 Constraint: token.NOT, 1082 }) 1083 }) 1084 return setPos(ast.NewCall(ast.NewIdent("close"), &ast.StructLit{Elts: a}), n) 1085 default: 1086 if !n.IsConcrete() { 1087 s.errf(n, "invalid non-concrete value") 1088 } 1089 return n.Syntax(cue.Final()).(ast.Expr) 1090 } 1091} 1092 1093// processMap processes a yaml node, expanding merges. 1094// 1095// TODO: in some cases we can translate merges into CUE embeddings. 1096// This may also prevent exponential blow-up (as may happen when 1097// converting YAML to JSON). 1098func (s *state) processMap(n cue.Value, f func(key string, n cue.Value)) { 1099 // TODO: intercept references to allow for optimized performance. 1100 for i, _ := n.Fields(); i.Next(); { 1101 f(i.Selector().Unquoted(), i.Value()) 1102 } 1103} 1104 1105func (s *state) listItems(name string, n cue.Value, allowEmpty bool) (a []cue.Value) { 1106 if n.Kind() != cue.ListKind { 1107 s.errf(n, `value of %q must be an array, found %v`, name, n.Kind()) 1108 } 1109 for i, _ := n.List(); i.Next(); { 1110 a = append(a, i.Value()) 1111 } 1112 if !allowEmpty && len(a) == 0 { 1113 s.errf(n, `array for %q must be non-empty`, name) 1114 } 1115 return a 1116} 1117 1118// excludeFields returns either an empty slice (if decls is empty) 1119// or a slice containing a CUE expression that can be used to exclude the 1120// fields of the given declaration in a label expression. For instance, for 1121// 1122// { foo: 1, bar: int } 1123// 1124// it creates a slice holding the expression 1125// 1126// !~ "^(foo|bar)$" 1127// 1128// which can be used in a label expression to define types for all fields but 1129// those existing: 1130// 1131// [!~"^(foo|bar)$"]: string 1132func excludeFields(decls []ast.Decl) []ast.Expr { 1133 if len(decls) == 0 { 1134 return nil 1135 } 1136 var buf strings.Builder 1137 first := true 1138 buf.WriteString("^(") 1139 for _, d := range decls { 1140 f, ok := d.(*ast.Field) 1141 if !ok { 1142 continue 1143 } 1144 str, _, _ := ast.LabelName(f.Label) 1145 if str != "" { 1146 if !first { 1147 buf.WriteByte('|') 1148 } 1149 buf.WriteString(regexp.QuoteMeta(str)) 1150 first = false 1151 } 1152 } 1153 buf.WriteString(")$") 1154 return []ast.Expr{ 1155 &ast.UnaryExpr{Op: token.NMAT, X: ast.NewString(buf.String())}, 1156 } 1157} 1158 1159func errorDisallowed() ast.Expr { 1160 return ast.NewCall(ast.NewIdent("error"), ast.NewString("disallowed")) 1161} 1162 1163func isErrorCall(e ast.Expr) bool { 1164 call, ok := e.(*ast.CallExpr) 1165 if !ok { 1166 return false 1167 } 1168 target, ok := call.Fun.(*ast.Ident) 1169 if !ok { 1170 return false 1171 } 1172 return target.Name == "error" 1173} 1174 1175func top() ast.Expr { 1176 return ast.NewIdent("_") 1177} 1178 1179func boolSchema(ok bool) ast.Expr { 1180 if ok { 1181 return top() 1182 } 1183 return errorDisallowed() 1184} 1185 1186func isTop(s ast.Expr) bool { 1187 i, ok := s.(*ast.Ident) 1188 return ok && i.Name == "_" 1189} 1190 1191func addTag(field ast.Label, tag, value string) *ast.Field { 1192 return &ast.Field{ 1193 Label: field, 1194 Value: top(), 1195 Attrs: []*ast.Attribute{ 1196 {Text: fmt.Sprintf("@%s(%s)", tag, value)}, 1197 }, 1198 } 1199} 1200 1201func setPos(e ast.Expr, v cue.Value) ast.Expr { 1202 ast.SetPos(e, v.Pos()) 1203 return e 1204} 1205 1206// uint64Value is like v.Uint64 except that it 1207// also allows floating point constants, as long 1208// as they have no fractional part. 1209func uint64Value(v cue.Value) (uint64, error) { 1210 n, err := v.Uint64() 1211 if err == nil { 1212 return n, nil 1213 } 1214 f, err := v.Float64() 1215 if err != nil { 1216 return 0, err 1217 } 1218 intPart, fracPart := math.Modf(f) 1219 if fracPart != 0 { 1220 return 0, errors.Newf(v.Pos(), "%v is not a whole number", v) 1221 } 1222 if intPart < 0 || intPart > math.MaxUint64 { 1223 return 0, errors.Newf(v.Pos(), "%v is out of bounds", v) 1224 } 1225 return uint64(intPart), nil 1226}