1// Copyright 2020 CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// TODO: make this package public in cuelang.org/go/encoding
16// once stabilized.
17
18package encoding
19
20import (
21 "fmt"
22 "io"
23 "maps"
24
25 "cuelang.org/go/cue"
26 "cuelang.org/go/cue/ast"
27 "cuelang.org/go/cue/build"
28 "cuelang.org/go/cue/errors"
29 "cuelang.org/go/cue/format"
30 "cuelang.org/go/cue/literal"
31 "cuelang.org/go/cue/parser"
32 "cuelang.org/go/cue/token"
33 "cuelang.org/go/encoding/json"
34 "cuelang.org/go/encoding/jsonschema"
35 "cuelang.org/go/encoding/openapi"
36 "cuelang.org/go/encoding/protobuf"
37 "cuelang.org/go/encoding/protobuf/jsonpb"
38 "cuelang.org/go/encoding/protobuf/textproto"
39 "cuelang.org/go/encoding/toml"
40 "cuelang.org/go/encoding/xml/koala"
41 "cuelang.org/go/internal"
42 "cuelang.org/go/internal/encoding/yaml"
43 "cuelang.org/go/internal/filetypes"
44 "cuelang.org/go/internal/source"
45 "golang.org/x/text/encoding/unicode"
46 "golang.org/x/text/transform"
47)
48
49type Decoder struct {
50 ctx *cue.Context
51 cfg *Config
52 closer io.Closer
53 next func() (ast.Expr, error)
54 rewriteFunc rewriteFunc
55 interpretFunc interpretFunc
56 interpretation build.Interpretation
57 expr ast.Expr
58 file *ast.File
59 filename string // may change on iteration for some formats
60 index int
61 size int // length of the source file if known; -1 otherwise
62 err error
63}
64
65type interpretFunc func(cue.Value) (file *ast.File, err error)
66type rewriteFunc func(*ast.File) (file *ast.File, err error)
67
68func (i *Decoder) Filename() string { return i.filename }
69
70// Interpretation returns the current interpretation detected by Detect.
71func (i *Decoder) Interpretation() build.Interpretation {
72 return i.interpretation
73}
74func (i *Decoder) Index() int { return i.index }
75func (i *Decoder) Done() bool { return i.err != nil }
76
77func (i *Decoder) Next() {
78 if i.err != nil {
79 return
80 }
81 // Decoder level
82 i.file = nil
83 i.expr, i.err = i.next()
84 i.index++
85 if i.err != nil {
86 return
87 }
88 i.doInterpret()
89}
90
91func (i *Decoder) doInterpret() {
92 if i.rewriteFunc != nil {
93 i.file = i.File()
94 var err error
95 i.file, err = i.rewriteFunc(i.file)
96 if err != nil {
97 i.err = err
98 return
99 }
100 }
101 if i.interpretFunc != nil {
102 i.file = i.File()
103 v := i.ctx.BuildFile(i.file)
104 if err := v.Err(); err != nil {
105 i.err = err
106 return
107 }
108 i.file, i.err = i.interpretFunc(v)
109 }
110}
111
112func (i *Decoder) File() *ast.File {
113 if i.file != nil {
114 return i.file
115 }
116 return internal.ToFile(i.expr)
117}
118
119func (i *Decoder) Err() error {
120 if i.err == io.EOF {
121 return nil
122 }
123 return i.err
124}
125
126func (i *Decoder) Close() {
127 if i.closer != nil {
128 i.closer.Close()
129 }
130}
131
132type Config struct {
133 Mode filetypes.Mode
134
135 // Out specifies an overwrite destination.
136 Out io.Writer
137 Stdin io.Reader
138 Stdout io.Writer
139
140 PkgName string // package name for files to generate
141
142 Force bool // overwrite existing files
143 Stream bool // potentially write more than one document per file
144 AllErrors bool
145
146 Schema cue.Value // used for schema-based decoding
147
148 EscapeHTML bool
149 InlineImports bool // expand references to non-core imports
150 ProtoPath []string
151 Format []format.Option
152 ParserConfig parser.Config
153 ParseFile func(name string, src interface{}, cfg parser.Config) (*ast.File, error)
154}
155
156// NewDecoder returns a stream of non-rooted data expressions. The encoding
157// type of f must be a data type, but does not have to be an encoding that
158// can stream. stdin is used in case the file is "-".
159//
160// This may change the contents of f.
161func NewDecoder(ctx *cue.Context, f *build.File, cfg *Config) *Decoder {
162 if cfg == nil {
163 cfg = &Config{}
164 }
165 if !cfg.ParserConfig.IsValid() {
166 // Avoid mutating cfg.
167 cfg.ParserConfig = parser.NewConfig(parser.ParseComments)
168 }
169 i := &Decoder{filename: f.Filename, ctx: ctx, cfg: cfg}
170 i.next = func() (ast.Expr, error) {
171 if i.err != nil {
172 return nil, i.err
173 }
174 return nil, io.EOF
175 }
176
177 if file, ok := f.Source.(*ast.File); ok {
178 i.file = file
179 i.validate(file, f)
180 return i
181 }
182
183 var r io.Reader
184 if f.Source == nil && f.Filename == "-" {
185 // TODO: should we allow this?
186 r = cfg.Stdin
187 i.size = -1
188 } else {
189 r, i.size, i.err = source.Open(f.Filename, f.Source)
190 if c, ok := r.(io.Closer); ok {
191 i.closer = c
192 }
193 if i.err != nil {
194 return i
195 }
196 }
197
198 switch f.Interpretation {
199 case "":
200 case build.Auto:
201 openAPI := openAPIFunc(cfg, f)
202 jsonSchema := jsonSchemaFunc(cfg, f)
203 i.interpretFunc = func(v cue.Value) (file *ast.File, err error) {
204
205 switch i.interpretation = Detect(v); i.interpretation {
206 case build.JSONSchema:
207 return jsonSchema(v)
208 case build.OpenAPI:
209 return openAPI(v)
210 }
211 return i.file, i.err
212 }
213 case build.OpenAPI:
214 i.interpretation = build.OpenAPI
215 i.interpretFunc = openAPIFunc(cfg, f)
216 case build.JSONSchema:
217 i.interpretation = build.JSONSchema
218 i.interpretFunc = jsonSchemaFunc(cfg, f)
219 case build.ProtobufJSON:
220 i.interpretation = build.ProtobufJSON
221 i.rewriteFunc = protobufJSONFunc(cfg, f)
222 default:
223 i.err = fmt.Errorf("unsupported interpretation %q", f.Interpretation)
224 }
225
226 // Binary encodings should not be treated as UTF-8, so read directly from the file.
227 // Other encodings are interepted as UTF-8 with an optional BOM prefix.
228 //
229 // TODO: perhaps each encoding could have a "binary" boolean attribute
230 // so that we can use that here rather than hard-coding which encodings are binary.
231 // In the near future, others like [build.BinaryProto] should also be treated as binary.
232 if f.Encoding != build.Binary {
233 // TODO: this code also allows UTF16, which is too permissive for some
234 // encodings. Switch to unicode.UTF8Sig once available.
235 t := unicode.BOMOverride(unicode.UTF8.NewDecoder())
236 r = transform.NewReader(r, t)
237 }
238
239 path := f.Filename
240 switch f.Encoding {
241 case build.CUE:
242 b, err := source.ReadAllSize(r, i.size)
243 if err != nil {
244 i.err = err
245 break
246 }
247 if cfg.ParseFile == nil {
248 i.file, i.err = parser.ParseFile(path, b, cfg.ParserConfig)
249 } else {
250 i.file, i.err = cfg.ParseFile(path, b, cfg.ParserConfig)
251 }
252 i.validate(i.file, f)
253 if i.err == nil {
254 i.doInterpret()
255 }
256 case build.JSON:
257 b, err := source.ReadAllSize(r, i.size)
258 if err != nil {
259 i.err = err
260 break
261 }
262 i.expr, i.err = json.Extract(path, b)
263 if i.err == nil {
264 i.doInterpret()
265 }
266 case build.JSONL:
267 i.next = json.NewDecoder(nil, path, r).Extract
268 i.Next()
269 case build.YAML:
270 b, err := source.ReadAllSize(r, i.size)
271 i.err = err
272 i.next = yaml.NewDecoder(path, b).Decode
273 i.Next()
274 case build.TOML:
275 i.next = toml.NewDecoder(path, r).Decode
276 i.Next()
277 case build.XML:
278 switch {
279 case f.BoolTags["koala"]:
280 i.next = koala.NewDecoder(path, r).Decode
281 i.Next()
282 default:
283 i.err = fmt.Errorf("xml requires a variant, such as: xml+koala")
284 }
285 case build.Text:
286 b, err := source.ReadAllSize(r, i.size)
287 i.err = err
288 i.expr = ast.NewString(string(b))
289 case build.Binary:
290 b, err := source.ReadAllSize(r, i.size)
291 i.err = err
292 s := literal.Bytes.WithTabIndent(1).Quote(string(b))
293 i.expr = ast.NewLit(token.STRING, s)
294 case build.Protobuf:
295 paths := &protobuf.Config{
296 Paths: cfg.ProtoPath,
297 PkgName: cfg.PkgName,
298 }
299 i.file, i.err = protobuf.Extract(path, r, paths)
300 case build.TextProto:
301 b, err := source.ReadAllSize(r, i.size)
302 i.err = err
303 if err == nil {
304 d := textproto.NewDecoder()
305 i.expr, i.err = d.Parse(cfg.Schema, path, b)
306 }
307 default:
308 i.err = fmt.Errorf("unsupported encoding %q", f.Encoding)
309 }
310
311 return i
312}
313
314func jsonSchemaFunc(cfg *Config, f *build.File) interpretFunc {
315 return func(v cue.Value) (file *ast.File, err error) {
316 tags := boolTagsForFile(f, build.JSONSchema)
317 cfg := &jsonschema.Config{
318 PkgName: cfg.PkgName,
319
320 // Note: we don't populate Strict because then we'd
321 // be ignoring the values of the other tags when it's true,
322 // and there's (deliberately) nothing that Strict does that
323 // cannot be described by the other two keywords.
324 // The strictKeywords and strictFeatures tags are
325 // set by internal/filetypes from the strict tag when appropriate.
326
327 StrictKeywords: tags["strictKeywords"],
328 StrictFeatures: tags["strictFeatures"],
329 OpenOnlyWhenExplicit: tags["openOnlyWhenExplicit"],
330 }
331 file, err = jsonschema.Extract(v, cfg)
332 // TODO: simplify currently erases file line info. Reintroduce after fix.
333 // file, err = simplify(file, err)
334 return file, err
335 }
336}
337
338func openAPIFunc(c *Config, f *build.File) interpretFunc {
339 return func(v cue.Value) (file *ast.File, err error) {
340 tags := boolTagsForFile(f, build.JSONSchema)
341 file, err = openapi.Extract(v, &openapi.Config{
342 PkgName: c.PkgName,
343
344 // Note: don't populate Strict (see more detailed
345 // comment in jsonSchemaFunc)
346
347 StrictKeywords: tags["strictKeywords"],
348 StrictFeatures: tags["strictFeatures"],
349 })
350 // TODO: simplify currently erases file line info. Reintroduce after fix.
351 // file, err = simplify(file, err)
352 return file, err
353 }
354}
355
356func protobufJSONFunc(cfg *Config, file *build.File) rewriteFunc {
357 return func(f *ast.File) (*ast.File, error) {
358 if !cfg.Schema.Exists() {
359 return f, errors.Newf(token.NoPos,
360 "no schema specified for protobuf interpretation.")
361 }
362 return f, jsonpb.NewDecoder(cfg.Schema).RewriteFile(f)
363 }
364}
365
366func boolTagsForFile(f *build.File, interp build.Interpretation) map[string]bool {
367 if f.Interpretation != build.Auto {
368 return f.BoolTags
369 }
370 defaultTags := filetypes.DefaultTagsForInterpretation(interp, filetypes.Input)
371 if len(defaultTags) == 0 {
372 return f.BoolTags
373 }
374 // We _could_ probably mutate f.Tags directly, but that doesn't
375 // seem quite right as it's been passed in from outside of internal/encoding.
376 // So go the extra mile and make a new map.
377
378 // Set values for tags that have a default value but aren't
379 // present in f.Tags.
380 var tags map[string]bool
381 for tag, val := range defaultTags {
382 if _, ok := f.BoolTags[tag]; ok {
383 continue
384 }
385 if tags == nil {
386 tags = make(map[string]bool)
387 }
388 tags[tag] = val
389 }
390 if tags == nil {
391 return f.BoolTags
392 }
393 maps.Copy(tags, f.BoolTags)
394 return tags
395}
396
397func shouldValidate(i *filetypes.FileInfo) bool {
398 // TODO: We ignore attributes for now. They should be enabled by default.
399 return false ||
400 !i.Definitions ||
401 !i.Data ||
402 !i.Optional ||
403 !i.Constraints ||
404 !i.References ||
405 !i.Cycles ||
406 !i.KeepDefaults ||
407 !i.Incomplete ||
408 !i.Imports ||
409 !i.Docs
410}
411
412type validator struct {
413 allErrors bool
414 count int
415 errs errors.Error
416 fileinfo *filetypes.FileInfo
417}
418
419func (d *Decoder) validate(f *ast.File, b *build.File) {
420 if d.err != nil {
421 return
422 }
423 fi, err := filetypes.FromFile(b, filetypes.Input)
424 if err != nil {
425 d.err = err
426 return
427 }
428 if !shouldValidate(fi) {
429 return
430 }
431
432 v := validator{fileinfo: fi, allErrors: d.cfg.AllErrors}
433 ast.Walk(f, v.validate, nil)
434 d.err = v.errs
435}
436
437func (v *validator) validate(n ast.Node) bool {
438 if v.count > 10 {
439 return false
440 }
441
442 i := v.fileinfo
443
444 // TODO: Cycles
445
446 ok := true
447 check := func(n ast.Node, option bool, s string, cond bool) {
448 if !option && cond {
449 v.errs = errors.Append(v.errs, errors.Newf(n.Pos(),
450 "%s not allowed in %s mode", s, v.fileinfo.Form))
451 v.count++
452 ok = false
453 }
454 }
455
456 // For now we don't make any distinction between these modes.
457
458 constraints := i.Constraints && i.Incomplete && i.Optional && i.References
459
460 check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
461
462 switch x := n.(type) {
463 case *ast.CommentGroup:
464 check(n, i.Docs, "comments", len(ast.Comments(n)) > 0)
465 return false
466
467 case *ast.ImportDecl, *ast.ImportSpec:
468 check(n, i.Imports, "imports", true)
469
470 case *ast.Field:
471 check(n, i.Definitions, "definitions", internal.IsDefinition(x.Label))
472 check(n, i.Data, "regular fields", internal.IsRegularField(x))
473
474 _, _, err := ast.LabelName(x.Label)
475 check(n, constraints, "optional fields", err != nil)
476
477 check(n, i.Attributes, "attributes", len(x.Attrs) > 0)
478 ast.Walk(x.Value, v.validate, nil)
479 return false
480
481 case *ast.UnaryExpr:
482 switch x.Op {
483 case token.MUL:
484 check(n, i.KeepDefaults, "default values", true)
485 case token.SUB, token.ADD:
486 // The parser represents negative numbers as an unary expression.
487 // Allow one `-` or `+`.
488 _, ok := x.X.(*ast.BasicLit)
489 check(n, constraints, "expressions", !ok)
490 case token.LSS, token.LEQ, token.EQL, token.GEQ, token.GTR,
491 token.NEQ, token.NMAT, token.MAT:
492 check(n, constraints, "constraints", true)
493 default:
494 check(n, constraints, "expressions", true)
495 }
496
497 case *ast.BinaryExpr, *ast.ParenExpr, *ast.IndexExpr, *ast.SliceExpr,
498 *ast.CallExpr, *ast.Comprehension, *ast.Interpolation:
499 check(n, constraints, "expressions", true)
500
501 case *ast.Ellipsis:
502 check(n, constraints, "ellipsis", true)
503
504 case *ast.Ident, *ast.SelectorExpr, *ast.Alias, *ast.LetClause:
505 check(n, i.References, "references", true)
506
507 default:
508 // Other types are either always okay or handled elsewhere.
509 }
510 return ok
511}