1// Copyright 2019 CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15package jsonschema
16
17// TODO:
18// - replace converter from YAML to CUE to CUE (schema) to CUE.
19// - define OpenAPI definitions als CUE.
20
21import (
22 "fmt"
23 "math"
24 "net/url"
25 "regexp"
26 "regexp/syntax"
27 "slices"
28 "strconv"
29 "strings"
30
31 "cuelang.org/go/cue"
32 "cuelang.org/go/cue/ast"
33 "cuelang.org/go/cue/ast/astutil"
34 "cuelang.org/go/cue/errors"
35 "cuelang.org/go/cue/token"
36 "cuelang.org/go/internal"
37)
38
39const (
40 // DefaultRootID is used as the absolute base URI for a schema
41 // when no value is provided in [Config.ID].
42 DefaultRootID = "https://" + DefaultRootIDHost
43 DefaultRootIDHost = "cue.jsonschema.invalid"
44)
45
46// rootDefs defines the top-level name of the map of definitions that do not
47// have a valid identifier name.
48//
49// TODO: find something more principled, like allowing #("a-b").
50const rootDefs = "#"
51
52// A decoder converts JSON schema to CUE.
53type decoder struct {
54 cfg *Config
55 errs errors.Error
56 mapURLErrors map[string]bool
57
58 root cue.Value
59 rootID *url.URL
60
61 // defForValue holds an entry for internal values
62 // that are known to map to a defined schema.
63 // A nil entry is stored for nodes that have been
64 // referred to but we haven't yet seen when walking
65 // the schemas.
66 defForValue *valueMap[*definedSchema]
67
68 // danglingRefs records the number of nil entries in defForValue,
69 // representing the number of references into the internal
70 // structure that have not yet been resolved.
71 danglingRefs int
72
73 // defs holds the set of named schemas, indexed by URI (both
74 // canonical, and root-relative if known), including external
75 // schemas that aren't known.
76 defs map[string]*definedSchema
77
78 // builder is used to build the final syntax tree as it becomes known.
79 builder structBuilder
80
81 // needAnotherPass is set to true when we know that
82 // we need another pass through the schema extraction
83 // process. This can happen because `MapRef` might choose
84 // a different location depending on whether a reference is local
85 // or external. We don't know that until we've traversed the
86 // entire schema and the `$ref` might be seen before the
87 // schema it's referring to. Still more passes might be required
88 // if a $ref is found to be referring to a node that would not normally
89 // be considered part of the schema data.
90 needAnotherPass bool
91}
92
93// definedSchema records information for a schema or subschema.
94type definedSchema struct {
95 // importPath is empty for internal schemas.
96 importPath string
97
98 // path holds the location of the schema relative to importPath.
99 path cue.Path
100
101 // schema holds the actual syntax for the schema. This
102 // is nil if the entry was created by a reference only.
103 schema ast.Expr
104
105 // comment holds any doc comment associated with the above schema.
106 comment *ast.CommentGroup
107}
108
109// addImport registers
110func (d *decoder) addImport(n cue.Value, pkg string) *ast.Ident {
111 spec := ast.NewImport(nil, pkg)
112 info, err := astutil.ParseImportSpec(spec)
113 if err != nil {
114 d.errf(cue.Value{}, "invalid import %q", pkg)
115 }
116 ident := ast.NewIdent(info.Ident)
117 ident.Node = spec
118 ast.SetPos(ident, n.Pos())
119
120 return ident
121}
122
123func (d *decoder) decode(v cue.Value) *ast.File {
124 var defsRoot cue.Value
125 // docRoot represents the root of the actual data, by contrast
126 // with the "root" value as specified in [Config.Root] which
127 // represents the root of the schemas to be decoded.
128 docRoot := v
129 if d.cfg.Root != "" {
130 rootPath, err := parseRootRef(d.cfg.Root)
131 if err != nil {
132 d.errf(cue.Value{}, "invalid Config.Root value %q: %v", d.cfg.Root, err)
133 return nil
134 }
135 root := v.LookupPath(rootPath)
136 if !root.Exists() && !d.cfg.AllowNonExistentRoot {
137 d.errf(v, "root value at path %v does not exist", d.cfg.Root)
138 return nil
139 }
140 if d.cfg.SingleRoot {
141 v = root
142 } else {
143 if !root.Exists() {
144 root = v.Context().CompileString("{}")
145 }
146 if root.Kind() != cue.StructKind {
147 d.errf(root, "value at path %v must be struct containing definitions but is actually %v", d.cfg.Root, root)
148 return nil
149 }
150 defsRoot = root
151 }
152 }
153
154 var rootInfo schemaInfo
155 // extraSchemas records any nodes that are referred to
156 // but not part of the regular schema traversal.
157 var extraSchemas []cue.Value
158 // basePass records the last time that any new schemas were
159 // added for inspection. This can be set whenever new schemas
160 // not part of the regular traversal are found.
161 basePass := 0
162
163 for pass := 0; ; pass++ {
164 if pass > 10 {
165 // Should never happen: the most we should ever see in practice
166 // should be 2, but some pathological cases could end up with more.
167 d.errf(v, "internal error: too many passes without resolution")
168 return nil
169 }
170 root := &state{
171 decoder: d,
172 schemaInfo: schemaInfo{
173 schemaVersion: d.cfg.DefaultVersion,
174 id: d.rootID,
175 },
176 isRoot: true,
177 pos: docRoot,
178 }
179
180 if defsRoot.Exists() {
181 // When d.cfg.Root is non-empty, it points to a struct
182 // containing a field for each definition.
183 constraintAddDefinitions("schemas", defsRoot, root)
184 } else {
185 expr, state := root.schemaState(v, allTypes, func(s *state) {
186 // We want the top level state to be treated as root even
187 // though it's some levels below the actual document top level.
188 s.isRoot = true
189 })
190 if state.allowedTypes == 0 {
191 root.errf(v, "constraints are not possible to satisfy")
192 return nil
193 }
194 if !d.builder.put(cue.Path{}, expr, state.comment()) {
195 root.errf(v, "duplicate definition at root") // TODO better error message
196 return nil
197 }
198 rootInfo = state
199 }
200 if d.danglingRefs > 0 && pass == basePass+1 {
201 // There are still dangling references but we've been through the
202 // schema twice, so we know that there's a reference
203 // to a non-schema node. Technically this is not necessarily valid,
204 // but we do see this in the wild. This should be rare,
205 // so efficiency (re-parsing paths) shouldn't be a great issue.
206 for path, def := range d.defForValue.byPath {
207 if def != nil {
208 continue
209 }
210 n := d.root.LookupPath(cue.ParsePath(path))
211 if !n.Exists() {
212 panic("failed to find entry for dangling reference")
213 }
214 extraSchemas = append(extraSchemas, n)
215 basePass = pass
216 }
217 }
218 for _, n := range extraSchemas {
219 // As the ID namespace isn't well-defined we treat all such
220 // schemas as if they were directly under the root.
221 // See https://json-schema.org/draft/2020-12/json-schema-core#section-9.4.2
222 root.schema(n)
223 }
224 if !d.needAnotherPass && d.danglingRefs == 0 {
225 break
226 }
227
228 d.builder = structBuilder{}
229 for _, def := range d.defs {
230 def.schema = nil
231 }
232 d.needAnotherPass = false
233 }
234 if d.cfg.DefineSchema != nil {
235 // Let the caller know about any internal schemas that
236 // have been mapped to an external location.
237 for _, def := range d.defs {
238 if def.schema != nil && def.importPath != "" {
239 d.cfg.DefineSchema(def.importPath, def.path, def.schema, def.comment)
240 }
241 }
242 }
243 f, err := d.builder.syntax()
244 if err != nil {
245 d.errf(v, "cannot build final syntax: %v", err)
246 return nil
247 }
248 var preamble []ast.Decl
249 if d.cfg.PkgName != "" {
250 preamble = append(preamble, &ast.Package{Name: ast.NewIdent(d.cfg.PkgName)})
251 }
252 if rootInfo.schemaVersionPresent {
253 // TODO use cue/literal.String
254 // TODO is this actually useful information: why is knowing the schema
255 // version of the input useful?
256 preamble = append(preamble, &ast.Attribute{
257 Text: fmt.Sprintf("@jsonschema(schema=%q)", rootInfo.schemaVersion),
258 })
259 }
260 if rootInfo.deprecated {
261 preamble = append(preamble, &ast.Attribute{Text: "@deprecated()"})
262 }
263 if len(preamble) > 0 {
264 f.Decls = append(preamble, f.Decls...)
265 }
266 return f
267}
268
269func (d *decoder) errf(n cue.Value, format string, args ...interface{}) ast.Expr {
270 d.warnf(n.Pos(), format, args...)
271 return &ast.BadExpr{From: n.Pos()}
272}
273
274func (d *decoder) warnf(p token.Pos, format string, args ...interface{}) {
275 d.addErr(errors.Newf(p, format, args...))
276}
277
278func (d *decoder) addErr(err errors.Error) {
279 d.errs = errors.Append(d.errs, err)
280}
281
282func (d *decoder) number(n cue.Value) ast.Expr {
283 return n.Syntax(cue.Final()).(ast.Expr)
284}
285
286func (d *decoder) uint(nv cue.Value) ast.Expr {
287 n, err := uint64Value(nv)
288 if err != nil {
289 d.errf(nv, "invalid uint")
290 }
291 return &ast.BasicLit{
292 ValuePos: nv.Pos(),
293 Kind: token.FLOAT,
294 Value: strconv.FormatUint(n, 10),
295 }
296}
297
298func (d *decoder) boolValue(n cue.Value) bool {
299 x, err := n.Bool()
300 if err != nil {
301 d.errf(n, "invalid bool")
302 }
303 return x
304}
305
306func (d *decoder) string(n cue.Value) ast.Expr {
307 return n.Syntax(cue.Final()).(ast.Expr)
308}
309
310func (d *decoder) strValue(n cue.Value) (s string, ok bool) {
311 s, err := n.String()
312 if err != nil {
313 d.errf(n, "invalid string")
314 return "", false
315 }
316 return s, true
317}
318
319func (d *decoder) regexpValue(n cue.Value) (ast.Expr, bool) {
320 s, ok := d.strValue(n)
321 if !ok {
322 return nil, false
323 }
324 if !d.checkRegexp(n, s) {
325 return nil, false
326 }
327 return d.string(n), true
328}
329
330func (d *decoder) checkRegexp(n cue.Value, s string) bool {
331 _, err := syntax.Parse(s, syntax.Perl)
332 if err == nil {
333 return true
334 }
335 var regErr *syntax.Error
336 if errors.As(err, ®Err) {
337 switch regErr.Code {
338 case syntax.ErrInvalidPerlOp:
339 // It's Perl syntax that we'll never support because the CUE evaluation
340 // engine uses Go's regexp implementation and because the missing
341 // features are usually not there for good reason (e.g. exponential
342 // runtime). In other words, this is a missing feature but not an invalid
343 // regular expression as such.
344 if d.cfg.StrictFeatures {
345 // TODO: could fall back to https://github.com/dlclark/regexp2 instead
346 d.errf(n, "unsupported Perl regexp syntax in %q: %v", s, err)
347 }
348 return false
349 case syntax.ErrInvalidCharRange:
350 // There are many more character class ranges than Go supports currently
351 // (see https://go.dev/issue/14509) so treat an unknown character class
352 // range as a feature error rather than a bad regexp.
353 // TODO translate names to Go-supported class names when possible.
354 if d.cfg.StrictFeatures {
355 d.errf(n, "unsupported regexp character class in %q: %v", s, err)
356 }
357 return false
358 }
359 }
360 d.errf(n, "invalid regexp %q: %v", s, err)
361 return false
362}
363
364// ensureDefinition ensures that node n will
365// be a defined schema.
366func (d *decoder) ensureDefinition(n cue.Value) {
367 if _, ok := d.defForValue.lookup(n); !ok {
368 d.defForValue.set(n, nil)
369 d.danglingRefs++
370 }
371}
372
373// const draftCutoff = 5
374
375type coreType int
376
377const (
378 nullType coreType = iota
379 boolType
380 numType
381 stringType
382 arrayType
383 objectType
384
385 numCoreTypes
386)
387
388var coreToCUE = []cue.Kind{
389 nullType: cue.NullKind,
390 boolType: cue.BoolKind,
391 numType: cue.NumberKind, // Note: both int and float.
392 stringType: cue.StringKind,
393 arrayType: cue.ListKind,
394 objectType: cue.StructKind,
395}
396
397func kindToAST(k cue.Kind, explicitOpen bool) ast.Expr {
398 switch k {
399 case cue.NullKind:
400 // TODO: handle OpenAPI restrictions.
401 return ast.NewNull()
402 case cue.BoolKind:
403 return ast.NewIdent("bool")
404 case cue.NumberKind:
405 return ast.NewIdent("number")
406 case cue.IntKind:
407 return ast.NewIdent("int")
408 case cue.FloatKind:
409 return ast.NewIdent("float")
410 case cue.StringKind:
411 return ast.NewIdent("string")
412 case cue.ListKind:
413 return ast.NewList(&ast.Ellipsis{})
414 case cue.StructKind:
415 if explicitOpen {
416 return ast.NewStruct()
417 }
418 return ast.NewStruct(&ast.Ellipsis{})
419 }
420 panic(fmt.Errorf("unexpected kind %v", k))
421}
422
423var coreTypeName = []string{
424 nullType: "null",
425 boolType: "bool",
426 numType: "number",
427 stringType: "string",
428 arrayType: "array",
429 objectType: "object",
430}
431
432type constraintInfo struct {
433 // typ is an identifier for the root type, if present.
434 // This can be omitted if there are constraints.
435 typ ast.Expr
436 constraints []ast.Expr
437}
438
439func (c *constraintInfo) setTypeUsed(n cue.Value, t coreType, explicitOpen bool) {
440 c.typ = kindToAST(coreToCUE[t], explicitOpen)
441 setPos(c.typ, n)
442 ast.SetRelPos(c.typ, token.NoRelPos)
443}
444
445func (c *constraintInfo) add(n cue.Value, x ast.Expr) {
446 if !isTop(x) {
447 setPos(x, n)
448 ast.SetRelPos(x, token.NoRelPos)
449 c.constraints = append(c.constraints, x)
450 }
451}
452
453func (s *state) add(n cue.Value, t coreType, x ast.Expr) {
454 s.types[t].add(n, x)
455}
456
457func (s *state) setTypeUsed(n cue.Value, t coreType) {
458 if int(t) >= len(s.types) {
459 panic(fmt.Errorf("type out of range %v/%v", int(t), len(s.types)))
460 }
461 s.types[t].setTypeUsed(n, t, s.cfg.OpenOnlyWhenExplicit)
462}
463
464type state struct {
465 *decoder
466 schemaInfo
467
468 up *state
469
470 pos cue.Value
471
472 // The constraints in types represent disjunctions per type.
473 types [numCoreTypes]constraintInfo
474 all constraintInfo // values and oneOf etc.
475 nullable *ast.BasicLit // nullable
476
477 exclusiveMin bool // For OpenAPI and legacy support.
478 exclusiveMax bool // For OpenAPI and legacy support.
479
480 // isRoot holds whether this state is at the root
481 // of the schema.
482 isRoot bool
483
484 minContains *uint64
485 maxContains *uint64
486
487 ifConstraint cue.Value
488 thenConstraint cue.Value
489 elseConstraint cue.Value
490
491 definitions []ast.Decl
492
493 // Used for inserting definitions, properties, etc.
494 obj *ast.StructLit
495 objN cue.Value // used for adding obj to constraints
496
497 patterns []ast.Expr
498
499 list *ast.ListLit
500
501 // listItemsIsArray keeps track of whether the
502 // value of the "items" keyword is an array.
503 // Without this, we can't distinguish between
504 //
505 // "items": true
506 //
507 // and
508 //
509 // "items": []
510 listItemsIsArray bool
511
512 // The following fields are used when the version is
513 // [VersionKubernetesCRD] to check that "properties" and
514 // "additionalProperties" may not be specified together.
515 hasProperties bool
516 hasAdditionalProperties bool
517
518 // Keep track of whether "items" and "type": "array" have been specified, because
519 // in OpenAPI it's mandatory when "type" is "array".
520 hasItems bool
521 isArray bool
522
523 // Keep track of whether a $ref keyword is present,
524 // because pre-2019-09 schemas ignore sibling keywords
525 // to $ref.
526 hasRefKeyword bool
527
528 // Keep track of whether we're preserving existing fields,
529 // which is preserved recursively by default, and is
530 // reset within properties or additionalProperties.
531 preserveUnknownFields bool
532
533 // k8sResourceKind and k8sAPIVersion record values from the
534 // x-kubernetes-group-version-kind keyword
535 // for the kind and apiVersion properties respectively.
536 k8sResourceKind string
537 k8sAPIVersion string
538
539 // Keep track of whether the object has been explicitly
540 // closed or opened (see [Config.OpenOnlyWhenExplicit]).
541 openness openness
542}
543
544type openness int
545
546const (
547 implicitlyOpen openness = iota
548 explicitlyOpen // explicitly opened, e.g. additionalProperties: true
549 explicitlyClosed // explicitly closed, e.g. additionalProperties: false
550 allFieldsCovered // complete pattern present, e.g. additionalProperties: type: string
551)
552
553// schemaInfo holds information about a schema
554// after it has been created.
555type schemaInfo struct {
556 // allowedTypes holds the set of types that
557 // this node is allowed to be.
558 allowedTypes cue.Kind
559
560 // knownTypes holds the set of types that this node
561 // is known to be one of by virtue of the constraints inside
562 // all. This is used to avoid adding redundant elements
563 // to the disjunction created by [state.finalize].
564 knownTypes cue.Kind
565
566 title string
567 description string
568
569 // id holds the absolute URI of the schema if has a $id field .
570 // It's the base URI for $ref or nested $id fields.
571 id *url.URL
572 deprecated bool
573
574 schemaVersion Version
575 schemaVersionPresent bool
576
577 hasConstraints bool
578}
579
580func (s *state) idTag() *ast.Attribute {
581 return &ast.Attribute{Text: fmt.Sprintf("@jsonschema(id=%q)", s.id)}
582}
583
584func (s *state) object(n cue.Value) *ast.StructLit {
585 if s.obj == nil {
586 s.obj = &ast.StructLit{}
587 s.objN = n
588 }
589 return s.obj
590}
591
592func (s *state) finalizeObject() {
593 if s.obj == nil && s.schemaVersion == VersionKubernetesCRD && (s.allowedTypes&cue.StructKind) != 0 && s.preserveUnknownFields {
594 // When x-kubernetes-preserve-unknown-fields is set, we need
595 // an explicit ellipsis even though kindToAST won't have added
596 // one, so make sure there's an object.
597 _ = s.object(s.pos)
598 }
599 if s.obj == nil {
600 return
601 }
602 if s.preserveUnknownFields {
603 s.openness = explicitlyOpen
604 }
605 var e ast.Expr = s.obj
606 if s.cfg.OpenOnlyWhenExplicit && s.openness == implicitlyOpen {
607 // Nothing to do: the struct is implicitly open but
608 // we've been directed to leave it like that.
609 } else if s.openness == allFieldsCovered {
610 // Nothing to do: there is a pattern constraint that covers all
611 // possible fields.
612 } else if s.openness == explicitlyClosed {
613 e = ast.NewCall(ast.NewIdent("close"), s.obj)
614 } else {
615 s.obj.Elts = append(s.obj.Elts, &ast.Ellipsis{})
616 }
617 s.add(s.objN, objectType, e)
618}
619
620func (s *state) hasConstraints() bool {
621 if len(s.all.constraints) > 0 {
622 return true
623 }
624 for _, t := range s.types {
625 if len(t.constraints) > 0 {
626 return true
627 }
628 }
629 return len(s.patterns) > 0 ||
630 s.title != "" ||
631 s.description != "" ||
632 s.obj != nil ||
633 s.id != nil
634}
635
636const allTypes = cue.BoolKind |
637 cue.ListKind |
638 cue.NullKind |
639 cue.NumberKind |
640 cue.IntKind |
641 cue.StringKind |
642 cue.StructKind
643
644// finalize constructs CUE syntax from the collected constraints.
645func (s *state) finalize() (e ast.Expr) {
646 if s.allowedTypes == 0 {
647 // Nothing is possible. This isn't a necessarily a problem, as
648 // we might be inside an allOf or oneOf with other valid constraints.
649 return errorDisallowed()
650 }
651
652 s.finalizeObject()
653
654 conjuncts := []ast.Expr{}
655 disjuncts := []ast.Expr{}
656
657 // Sort literal structs and list last for nicer formatting.
658 // Use a stable sort so that the relative order of constraints
659 // is otherwise kept as-is, for the sake of deterministic output.
660 slices.SortStableFunc(s.types[arrayType].constraints, func(a, b ast.Expr) int {
661 _, aList := a.(*ast.ListLit)
662 _, bList := b.(*ast.ListLit)
663 return cmpBool(aList, bList)
664 })
665 slices.SortStableFunc(s.types[objectType].constraints, func(a, b ast.Expr) int {
666 _, aStruct := a.(*ast.StructLit)
667 _, bStruct := b.(*ast.StructLit)
668 return cmpBool(aStruct, bStruct)
669 })
670
671 type excludeInfo struct {
672 pos token.Pos
673 typIndex int
674 }
675 var excluded []excludeInfo
676
677 needsTypeDisjunction := s.allowedTypes != s.knownTypes
678 if !needsTypeDisjunction {
679 for i, t := range s.types {
680 k := coreToCUE[i]
681 if len(t.constraints) > 0 && s.allowedTypes&k != 0 {
682 // We need to include at least one type-specific
683 // constraint in the disjunction.
684 needsTypeDisjunction = true
685 break
686 }
687 }
688 }
689
690 if needsTypeDisjunction {
691 npossible := 0
692 nexcluded := 0
693 for i, t := range s.types {
694 k := coreToCUE[i]
695 allowed := s.allowedTypes&k != 0
696 switch {
697 case len(t.constraints) > 0:
698 npossible++
699 if !allowed {
700 nexcluded++
701 for _, c := range t.constraints {
702 excluded = append(excluded, excludeInfo{c.Pos(), i})
703 }
704 continue
705 }
706 x := ast.NewBinExpr(token.AND, t.constraints...)
707 disjuncts = append(disjuncts, x)
708 case allowed:
709 npossible++
710 if s.knownTypes&k != 0 {
711 disjuncts = append(disjuncts, kindToAST(k, s.cfg.OpenOnlyWhenExplicit))
712 }
713 }
714 }
715 if nexcluded == npossible {
716 // All possibilities have been excluded: this is an impossible
717 // schema.
718 for _, e := range excluded {
719 s.addErr(errors.Newf(e.pos,
720 "constraint not allowed because type %s is excluded",
721 coreTypeName[e.typIndex],
722 ))
723 }
724 }
725 }
726 conjuncts = append(conjuncts, s.all.constraints...)
727
728 if len(disjuncts) > 0 {
729 conjuncts = append(conjuncts, ast.NewBinExpr(token.OR, disjuncts...))
730 }
731
732 if len(conjuncts) == 0 {
733 // There are no conjuncts, which can only happen when there
734 // are no disjuncts, which can only happen when the entire
735 // set of disjuncts is redundant with respect to the types
736 // already implied by s.all. As we've already checked that
737 // s.allowedTypes is non-zero (so we know that
738 // it's not bottom) and we need _some_ expression
739 // to be part of the subequent syntax, we use top.
740 e = top()
741 } else {
742 e = ast.NewBinExpr(token.AND, conjuncts...)
743 }
744
745 a := []ast.Expr{e}
746 if s.nullable != nil {
747 a = []ast.Expr{s.nullable, e}
748 }
749
750 e = ast.NewBinExpr(token.OR, a...)
751
752 if len(s.definitions) > 0 {
753 if st, ok := e.(*ast.StructLit); ok {
754 st.Elts = append(st.Elts, s.definitions...)
755 } else {
756 st = ast.NewStruct()
757 st.Elts = append(st.Elts, &ast.EmbedDecl{Expr: e})
758 st.Elts = append(st.Elts, s.definitions...)
759 e = st
760 }
761 }
762
763 // If an "$id" exists, make sure it's present in the output.
764 if s.id != nil {
765 if st, ok := e.(*ast.StructLit); ok {
766 st.Elts = append([]ast.Decl{s.idTag()}, st.Elts...)
767 } else {
768 e = &ast.StructLit{Elts: []ast.Decl{s.idTag(), &ast.EmbedDecl{Expr: e}}}
769 }
770 }
771
772 // Now that we've expressed the schema as actual syntax,
773 // all the allowed types are actually explicit and will not
774 // need to be mentioned again.
775 s.knownTypes = s.allowedTypes
776 return e
777}
778
779// cmpBool returns
780//
781// -1 if x is less than y,
782// 0 if x equals y,
783// +1 if x is greater than y,
784//
785// where false is ordered before true.
786func cmpBool(x, y bool) int {
787 switch {
788 case !x && y:
789 return -1
790 case x && !y:
791 return +1
792 default:
793 return 0
794 }
795}
796
797func (s schemaInfo) comment() *ast.CommentGroup {
798 // Create documentation.
799 doc := strings.TrimSpace(s.title)
800 if s.description != "" {
801 if doc != "" {
802 doc += "\n\n"
803 }
804 doc += s.description
805 doc = strings.TrimSpace(doc)
806 }
807 // TODO: add examples as well?
808 if doc == "" {
809 return nil
810 }
811 return internal.NewComment(true, doc)
812}
813
814func (s *state) schema(n cue.Value) ast.Expr {
815 expr, _ := s.schemaState(n, allTypes, nil)
816 return expr
817}
818
819// schemaState returns a new state value derived from s.
820// n holds the JSONSchema node to translate to a schema.
821// types holds the set of possible types that the value can hold.
822//
823// If init is not nil, it is called on the newly created state value
824// before doing anything else.
825func (s0 *state) schemaState(n cue.Value, types cue.Kind, init func(*state)) (expr ast.Expr, info schemaInfo) {
826 s := &state{
827 up: s0,
828 schemaInfo: schemaInfo{
829 schemaVersion: s0.schemaVersion,
830 allowedTypes: types,
831 knownTypes: allTypes,
832 },
833 decoder: s0.decoder,
834 pos: n,
835 isRoot: s0.isRoot && n == s0.pos,
836 preserveUnknownFields: s0.preserveUnknownFields,
837 }
838 if init != nil {
839 init(s)
840 }
841 defer func() {
842 // Perhaps replace the schema expression with a reference.
843 expr = s.maybeDefine(expr, info)
844 }()
845 if n.Kind() == cue.BoolKind {
846 if s.schemaVersion.is(vfrom(VersionDraft6)) {
847 // From draft6 onwards, boolean values signify a schema that always passes or fails.
848 // TODO if false, set s.allowedTypes and s.knownTypes to zero?
849 return boolSchema(s.boolValue(n)), s.schemaInfo
850 }
851 return s.errf(n, "boolean schemas not supported in %v", s.schemaVersion), s.schemaInfo
852 }
853 if n.Kind() != cue.StructKind {
854 return s.errf(n, "schema expects mapping node, found %s", n.Kind()), s.schemaInfo
855 }
856
857 // do multiple passes over the constraints to ensure they are done in order.
858 for pass := 0; pass < numPhases; pass++ {
859 s.processMap(n, func(key string, value cue.Value) {
860 if pass == 0 && key == "$ref" {
861 // Before 2019-19, keywords alongside $ref are ignored so keep
862 // track of whether we've seen any non-$ref keywords so we can
863 // ignore those keywords. This could apply even when the schema
864 // is >=2019-19 because $schema could be used to change the version.
865 s.hasRefKeyword = true
866 }
867 // Convert each constraint into a either a value or a functor.
868 c := constraintMap[key]
869 if c == nil {
870 if strings.HasPrefix(key, "x-") {
871 // A keyword starting with a leading x- is clearly
872 // not intended to be a valid keyword, and is explicitly
873 // allowed by OpenAPI. It seems reasonable that
874 // this is not an error even with StrictKeywords enabled.
875 return
876 }
877 if pass == 0 && s.cfg.StrictKeywords {
878 // TODO: value is not the correct position, albeit close. Fix this.
879 s.warnUnrecognizedKeyword(key, value, "unknown keyword %q", key)
880 }
881 return
882 }
883 if c.phase != pass {
884 return
885 }
886 if !s.schemaVersion.is(c.versions) {
887 s.warnUnrecognizedKeyword(key, value, "keyword %q is not supported in JSON schema version %v", key, s.schemaVersion)
888 return
889 }
890 if pass > 0 && !s.schemaVersion.is(vfrom(VersionDraft2019_09)) && s.hasRefKeyword && key != "$ref" {
891 // We're using a schema version that ignores keywords alongside $ref.
892 //
893 // Note that we specifically exclude pass 0 (the pass in which $schema is checked)
894 // from this check, because hasRefKeyword is only set in pass 0 and we
895 // can get into a self-contradictory situation ($schema says we should
896 // ignore keywords alongside $ref, but $ref says we should ignore the $schema
897 // keyword itself). We could make that situation an explicit error, but other
898 // implementations don't, and it would require an entire extra pass just to do so.
899 s.warnUnrecognizedKeyword(key, value, "ignoring keyword %q alongside $ref", key)
900 return
901 }
902 c.fn(key, value, s)
903 })
904 if s.schemaVersion == VersionKubernetesCRD && s.isRoot {
905 // The root of a CRD is always a resource, so treat it as if it contained
906 // the x-kubernetes-embedded-resource keyword
907 // TODO remove this behavior now that we have an explicit
908 // ExtractCRDs function which does a better job at doing this.
909 c := constraintMap["x-kubernetes-embedded-resource"]
910 if c.phase != pass {
911 continue
912 }
913 // Note: there is no field value for the embedded-resource keyword,
914 // but it's not actually used except for its position so passing
915 // the parent object should work fine.
916 c.fn("x-kubernetes-embedded-resource", n, s)
917 }
918 }
919 if s.id != nil {
920 // If there's an ID, it can be referred to.
921 s.ensureDefinition(s.pos)
922 }
923 constraintIfThenElse(s)
924 if s.schemaVersion == VersionKubernetesCRD {
925 if s.hasProperties && s.hasAdditionalProperties {
926 s.errf(n, "additionalProperties may not be combined with properties in %v", s.schemaVersion)
927 }
928 }
929 if s.schemaVersion.is(openAPILike) {
930 if s.isArray && !s.hasItems {
931 // From https://github.com/OAI/OpenAPI-Specification/blob/3.0.0/versions/3.0.0.md#schema-object
932 // "`items` MUST be present if the `type` is `array`."
933 s.errf(n, `"items" must be present when the "type" is "array" in %v`, s.schemaVersion)
934 }
935 }
936
937 schemaExpr := s.finalize()
938 s.schemaInfo.hasConstraints = s.hasConstraints()
939 return schemaExpr, s.schemaInfo
940}
941
942func (s *state) warnUnrecognizedKeyword(key string, n cue.Value, msg string, args ...any) {
943 if !s.cfg.StrictKeywords {
944 return
945 }
946 if s.schemaVersion.is(openAPILike) && strings.HasPrefix(key, "x-") {
947 // Unimplemented x- keywords are allowed even with strict keywords
948 // under OpenAPI-like versions, because those versions enable
949 // strict keywords by default.
950 return
951 }
952 s.errf(n, msg, args...)
953}
954
955// maybeDefine checks whether we might need a definition
956// for n given its actual schema syntax expression. If
957// it does, it creates the definition as appropriate and returns
958// an expression that refers to that definition; if not,
959// it just returns expr itself.
960// TODO also report whether the schema has been defined at a place
961// where it can be unified with something else?
962func (s *state) maybeDefine(expr ast.Expr, info schemaInfo) ast.Expr {
963 def := s.definedSchemaForNode(s.pos)
964 if def == nil || len(def.path.Selectors()) == 0 {
965 return expr
966 }
967 def.schema = expr
968 def.comment = info.comment()
969 if def.importPath == "" {
970 // It's a local definition that's not at the root.
971 if !s.builder.put(def.path, expr, s.comment()) {
972 s.errf(s.pos, "redefinition of schema CUE path %v", def.path)
973 return expr
974 }
975 }
976 return s.refExpr(s.pos, def.importPath, def.path)
977}
978
979// definedSchemaForNode returns the definedSchema value
980// for the given node in the JSON schema, or nil
981// if the node does not need a definition.
982func (s *state) definedSchemaForNode(n cue.Value) *definedSchema {
983 def, ok := s.defForValue.lookup(n)
984 if !ok {
985 return nil
986 }
987 if def != nil {
988 // We've either made a definition in a previous pass
989 // or it's a redefinition.
990 // TODO if it's a redefinition, error.
991 return def
992 }
993 // This node has been referred to but not actually defined. We'll
994 // need another pass to sort out the reference even though the
995 // reference is no longer dangling.
996 s.needAnotherPass = true
997
998 def = s.addDefinition(n)
999 if def == nil {
1000 return nil
1001 }
1002 s.defForValue.set(n, def)
1003 s.danglingRefs--
1004 return def
1005}
1006
1007func (s *state) addDefinition(n cue.Value) *definedSchema {
1008 var loc SchemaLoc
1009 schemaRoot := s.schemaRoot()
1010 loc.ID = ref(*schemaRoot.id)
1011 loc.ID.Fragment = mustCUEPathToJSONPointer(relPath(n, schemaRoot.pos))
1012 idStr := loc.ID.String()
1013 def, ok := s.defs[idStr]
1014 if ok {
1015 // We've already got a definition for this ID.
1016 // TODO if it's been defined in the same pass, then it's a redefinition
1017 // s.errf(n, "redefinition of schema %s at %v", idStr, n.Path())
1018 return def
1019 }
1020 loc.IsLocal = true
1021 loc.Path = relPath(n, s.root)
1022 importPath, path, err := s.cfg.MapRef(loc)
1023 if err != nil {
1024 s.errf(n, "cannot get reference for %v: %v", loc, err)
1025 return nil
1026 }
1027 def = &definedSchema{
1028 importPath: importPath,
1029 path: path,
1030 }
1031 s.defs[idStr] = def
1032 return def
1033}
1034
1035// refExpr returns a CUE expression to refer to the given path within the given
1036// imported CUE package. If importPath is empty, it returns a reference
1037// relative to the root of the schema being generated.
1038func (s *state) refExpr(n cue.Value, importPath string, path cue.Path) ast.Expr {
1039 if importPath == "" {
1040 // Internal reference
1041 expr, err := s.builder.getRef(path)
1042 if err != nil {
1043 s.errf(n, "cannot generate reference: %v", err)
1044 return nil
1045 }
1046 return expr
1047 }
1048 // External reference
1049 ip := ast.ParseImportPath(importPath)
1050 if ip.Qualifier == "" {
1051 // TODO choose an arbitrary name here.
1052 s.errf(n, "cannot determine package name from import path %q", importPath)
1053 return nil
1054 }
1055 ident := ast.NewIdent(ip.Qualifier)
1056 ident.Node = &ast.ImportSpec{Path: ast.NewString(importPath)}
1057 expr, err := pathRefSyntax(path, ident)
1058 if err != nil {
1059 s.errf(n, "cannot determine CUE path: %v", err)
1060 return nil
1061 }
1062 return expr
1063}
1064
1065func (s *state) constValue(n cue.Value) ast.Expr {
1066 k := n.Kind()
1067 switch k {
1068 case cue.ListKind:
1069 a := []ast.Expr{}
1070 for i, _ := n.List(); i.Next(); {
1071 a = append(a, s.constValue(i.Value()))
1072 }
1073 return setPos(ast.NewList(a...), n)
1074
1075 case cue.StructKind:
1076 a := []ast.Decl{}
1077 s.processMap(n, func(key string, n cue.Value) {
1078 a = append(a, &ast.Field{
1079 Label: ast.NewString(key),
1080 Value: s.constValue(n),
1081 Constraint: token.NOT,
1082 })
1083 })
1084 return setPos(ast.NewCall(ast.NewIdent("close"), &ast.StructLit{Elts: a}), n)
1085 default:
1086 if !n.IsConcrete() {
1087 s.errf(n, "invalid non-concrete value")
1088 }
1089 return n.Syntax(cue.Final()).(ast.Expr)
1090 }
1091}
1092
1093// processMap processes a yaml node, expanding merges.
1094//
1095// TODO: in some cases we can translate merges into CUE embeddings.
1096// This may also prevent exponential blow-up (as may happen when
1097// converting YAML to JSON).
1098func (s *state) processMap(n cue.Value, f func(key string, n cue.Value)) {
1099 // TODO: intercept references to allow for optimized performance.
1100 for i, _ := n.Fields(); i.Next(); {
1101 f(i.Selector().Unquoted(), i.Value())
1102 }
1103}
1104
1105func (s *state) listItems(name string, n cue.Value, allowEmpty bool) (a []cue.Value) {
1106 if n.Kind() != cue.ListKind {
1107 s.errf(n, `value of %q must be an array, found %v`, name, n.Kind())
1108 }
1109 for i, _ := n.List(); i.Next(); {
1110 a = append(a, i.Value())
1111 }
1112 if !allowEmpty && len(a) == 0 {
1113 s.errf(n, `array for %q must be non-empty`, name)
1114 }
1115 return a
1116}
1117
1118// excludeFields returns either an empty slice (if decls is empty)
1119// or a slice containing a CUE expression that can be used to exclude the
1120// fields of the given declaration in a label expression. For instance, for
1121//
1122// { foo: 1, bar: int }
1123//
1124// it creates a slice holding the expression
1125//
1126// !~ "^(foo|bar)$"
1127//
1128// which can be used in a label expression to define types for all fields but
1129// those existing:
1130//
1131// [!~"^(foo|bar)$"]: string
1132func excludeFields(decls []ast.Decl) []ast.Expr {
1133 if len(decls) == 0 {
1134 return nil
1135 }
1136 var buf strings.Builder
1137 first := true
1138 buf.WriteString("^(")
1139 for _, d := range decls {
1140 f, ok := d.(*ast.Field)
1141 if !ok {
1142 continue
1143 }
1144 str, _, _ := ast.LabelName(f.Label)
1145 if str != "" {
1146 if !first {
1147 buf.WriteByte('|')
1148 }
1149 buf.WriteString(regexp.QuoteMeta(str))
1150 first = false
1151 }
1152 }
1153 buf.WriteString(")$")
1154 return []ast.Expr{
1155 &ast.UnaryExpr{Op: token.NMAT, X: ast.NewString(buf.String())},
1156 }
1157}
1158
1159func errorDisallowed() ast.Expr {
1160 return ast.NewCall(ast.NewIdent("error"), ast.NewString("disallowed"))
1161}
1162
1163func isErrorCall(e ast.Expr) bool {
1164 call, ok := e.(*ast.CallExpr)
1165 if !ok {
1166 return false
1167 }
1168 target, ok := call.Fun.(*ast.Ident)
1169 if !ok {
1170 return false
1171 }
1172 return target.Name == "error"
1173}
1174
1175func top() ast.Expr {
1176 return ast.NewIdent("_")
1177}
1178
1179func boolSchema(ok bool) ast.Expr {
1180 if ok {
1181 return top()
1182 }
1183 return errorDisallowed()
1184}
1185
1186func isTop(s ast.Expr) bool {
1187 i, ok := s.(*ast.Ident)
1188 return ok && i.Name == "_"
1189}
1190
1191func addTag(field ast.Label, tag, value string) *ast.Field {
1192 return &ast.Field{
1193 Label: field,
1194 Value: top(),
1195 Attrs: []*ast.Attribute{
1196 {Text: fmt.Sprintf("@%s(%s)", tag, value)},
1197 },
1198 }
1199}
1200
1201func setPos(e ast.Expr, v cue.Value) ast.Expr {
1202 ast.SetPos(e, v.Pos())
1203 return e
1204}
1205
1206// uint64Value is like v.Uint64 except that it
1207// also allows floating point constants, as long
1208// as they have no fractional part.
1209func uint64Value(v cue.Value) (uint64, error) {
1210 n, err := v.Uint64()
1211 if err == nil {
1212 return n, nil
1213 }
1214 f, err := v.Float64()
1215 if err != nil {
1216 return 0, err
1217 }
1218 intPart, fracPart := math.Modf(f)
1219 if fracPart != 0 {
1220 return 0, errors.Newf(v.Pos(), "%v is not a whole number", v)
1221 }
1222 if intPart < 0 || intPart > math.MaxUint64 {
1223 return 0, errors.Newf(v.Pos(), "%v is out of bounds", v)
1224 }
1225 return uint64(intPart), nil
1226}