this repo has no description
at master 511 lines 14 kB view raw
1// Copyright 2020 CUE Authors 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15// TODO: make this package public in cuelang.org/go/encoding 16// once stabilized. 17 18package encoding 19 20import ( 21 "fmt" 22 "io" 23 "maps" 24 25 "cuelang.org/go/cue" 26 "cuelang.org/go/cue/ast" 27 "cuelang.org/go/cue/build" 28 "cuelang.org/go/cue/errors" 29 "cuelang.org/go/cue/format" 30 "cuelang.org/go/cue/literal" 31 "cuelang.org/go/cue/parser" 32 "cuelang.org/go/cue/token" 33 "cuelang.org/go/encoding/json" 34 "cuelang.org/go/encoding/jsonschema" 35 "cuelang.org/go/encoding/openapi" 36 "cuelang.org/go/encoding/protobuf" 37 "cuelang.org/go/encoding/protobuf/jsonpb" 38 "cuelang.org/go/encoding/protobuf/textproto" 39 "cuelang.org/go/encoding/toml" 40 "cuelang.org/go/encoding/xml/koala" 41 "cuelang.org/go/internal" 42 "cuelang.org/go/internal/encoding/yaml" 43 "cuelang.org/go/internal/filetypes" 44 "cuelang.org/go/internal/source" 45 "golang.org/x/text/encoding/unicode" 46 "golang.org/x/text/transform" 47) 48 49type Decoder struct { 50 ctx *cue.Context 51 cfg *Config 52 closer io.Closer 53 next func() (ast.Expr, error) 54 rewriteFunc rewriteFunc 55 interpretFunc interpretFunc 56 interpretation build.Interpretation 57 expr ast.Expr 58 file *ast.File 59 filename string // may change on iteration for some formats 60 index int 61 size int // length of the source file if known; -1 otherwise 62 err error 63} 64 65type interpretFunc func(cue.Value) (file *ast.File, err error) 66type rewriteFunc func(*ast.File) (file *ast.File, err error) 67 68func (i *Decoder) Filename() string { return i.filename } 69 70// Interpretation returns the current interpretation detected by Detect. 71func (i *Decoder) Interpretation() build.Interpretation { 72 return i.interpretation 73} 74func (i *Decoder) Index() int { return i.index } 75func (i *Decoder) Done() bool { return i.err != nil } 76 77func (i *Decoder) Next() { 78 if i.err != nil { 79 return 80 } 81 // Decoder level 82 i.file = nil 83 i.expr, i.err = i.next() 84 i.index++ 85 if i.err != nil { 86 return 87 } 88 i.doInterpret() 89} 90 91func (i *Decoder) doInterpret() { 92 if i.rewriteFunc != nil { 93 i.file = i.File() 94 var err error 95 i.file, err = i.rewriteFunc(i.file) 96 if err != nil { 97 i.err = err 98 return 99 } 100 } 101 if i.interpretFunc != nil { 102 i.file = i.File() 103 v := i.ctx.BuildFile(i.file) 104 if err := v.Err(); err != nil { 105 i.err = err 106 return 107 } 108 i.file, i.err = i.interpretFunc(v) 109 } 110} 111 112func (i *Decoder) File() *ast.File { 113 if i.file != nil { 114 return i.file 115 } 116 return internal.ToFile(i.expr) 117} 118 119func (i *Decoder) Err() error { 120 if i.err == io.EOF { 121 return nil 122 } 123 return i.err 124} 125 126func (i *Decoder) Close() { 127 if i.closer != nil { 128 i.closer.Close() 129 } 130} 131 132type Config struct { 133 Mode filetypes.Mode 134 135 // Out specifies an overwrite destination. 136 Out io.Writer 137 Stdin io.Reader 138 Stdout io.Writer 139 140 PkgName string // package name for files to generate 141 142 Force bool // overwrite existing files 143 Stream bool // potentially write more than one document per file 144 AllErrors bool 145 146 Schema cue.Value // used for schema-based decoding 147 148 EscapeHTML bool 149 InlineImports bool // expand references to non-core imports 150 ProtoPath []string 151 Format []format.Option 152 ParserConfig parser.Config 153 ParseFile func(name string, src interface{}, cfg parser.Config) (*ast.File, error) 154} 155 156// NewDecoder returns a stream of non-rooted data expressions. The encoding 157// type of f must be a data type, but does not have to be an encoding that 158// can stream. stdin is used in case the file is "-". 159// 160// This may change the contents of f. 161func NewDecoder(ctx *cue.Context, f *build.File, cfg *Config) *Decoder { 162 if cfg == nil { 163 cfg = &Config{} 164 } 165 if !cfg.ParserConfig.IsValid() { 166 // Avoid mutating cfg. 167 cfg.ParserConfig = parser.NewConfig(parser.ParseComments) 168 } 169 i := &Decoder{filename: f.Filename, ctx: ctx, cfg: cfg} 170 i.next = func() (ast.Expr, error) { 171 if i.err != nil { 172 return nil, i.err 173 } 174 return nil, io.EOF 175 } 176 177 if file, ok := f.Source.(*ast.File); ok { 178 i.file = file 179 i.validate(file, f) 180 return i 181 } 182 183 var r io.Reader 184 if f.Source == nil && f.Filename == "-" { 185 // TODO: should we allow this? 186 r = cfg.Stdin 187 i.size = -1 188 } else { 189 r, i.size, i.err = source.Open(f.Filename, f.Source) 190 if c, ok := r.(io.Closer); ok { 191 i.closer = c 192 } 193 if i.err != nil { 194 return i 195 } 196 } 197 198 switch f.Interpretation { 199 case "": 200 case build.Auto: 201 openAPI := openAPIFunc(cfg, f) 202 jsonSchema := jsonSchemaFunc(cfg, f) 203 i.interpretFunc = func(v cue.Value) (file *ast.File, err error) { 204 205 switch i.interpretation = Detect(v); i.interpretation { 206 case build.JSONSchema: 207 return jsonSchema(v) 208 case build.OpenAPI: 209 return openAPI(v) 210 } 211 return i.file, i.err 212 } 213 case build.OpenAPI: 214 i.interpretation = build.OpenAPI 215 i.interpretFunc = openAPIFunc(cfg, f) 216 case build.JSONSchema: 217 i.interpretation = build.JSONSchema 218 i.interpretFunc = jsonSchemaFunc(cfg, f) 219 case build.ProtobufJSON: 220 i.interpretation = build.ProtobufJSON 221 i.rewriteFunc = protobufJSONFunc(cfg, f) 222 default: 223 i.err = fmt.Errorf("unsupported interpretation %q", f.Interpretation) 224 } 225 226 // Binary encodings should not be treated as UTF-8, so read directly from the file. 227 // Other encodings are interepted as UTF-8 with an optional BOM prefix. 228 // 229 // TODO: perhaps each encoding could have a "binary" boolean attribute 230 // so that we can use that here rather than hard-coding which encodings are binary. 231 // In the near future, others like [build.BinaryProto] should also be treated as binary. 232 if f.Encoding != build.Binary { 233 // TODO: this code also allows UTF16, which is too permissive for some 234 // encodings. Switch to unicode.UTF8Sig once available. 235 t := unicode.BOMOverride(unicode.UTF8.NewDecoder()) 236 r = transform.NewReader(r, t) 237 } 238 239 path := f.Filename 240 switch f.Encoding { 241 case build.CUE: 242 b, err := source.ReadAllSize(r, i.size) 243 if err != nil { 244 i.err = err 245 break 246 } 247 if cfg.ParseFile == nil { 248 i.file, i.err = parser.ParseFile(path, b, cfg.ParserConfig) 249 } else { 250 i.file, i.err = cfg.ParseFile(path, b, cfg.ParserConfig) 251 } 252 i.validate(i.file, f) 253 if i.err == nil { 254 i.doInterpret() 255 } 256 case build.JSON: 257 b, err := source.ReadAllSize(r, i.size) 258 if err != nil { 259 i.err = err 260 break 261 } 262 i.expr, i.err = json.Extract(path, b) 263 if i.err == nil { 264 i.doInterpret() 265 } 266 case build.JSONL: 267 i.next = json.NewDecoder(nil, path, r).Extract 268 i.Next() 269 case build.YAML: 270 b, err := source.ReadAllSize(r, i.size) 271 i.err = err 272 i.next = yaml.NewDecoder(path, b).Decode 273 i.Next() 274 case build.TOML: 275 i.next = toml.NewDecoder(path, r).Decode 276 i.Next() 277 case build.XML: 278 switch { 279 case f.BoolTags["koala"]: 280 i.next = koala.NewDecoder(path, r).Decode 281 i.Next() 282 default: 283 i.err = fmt.Errorf("xml requires a variant, such as: xml+koala") 284 } 285 case build.Text: 286 b, err := source.ReadAllSize(r, i.size) 287 i.err = err 288 i.expr = ast.NewString(string(b)) 289 case build.Binary: 290 b, err := source.ReadAllSize(r, i.size) 291 i.err = err 292 s := literal.Bytes.WithTabIndent(1).Quote(string(b)) 293 i.expr = ast.NewLit(token.STRING, s) 294 case build.Protobuf: 295 paths := &protobuf.Config{ 296 Paths: cfg.ProtoPath, 297 PkgName: cfg.PkgName, 298 } 299 i.file, i.err = protobuf.Extract(path, r, paths) 300 case build.TextProto: 301 b, err := source.ReadAllSize(r, i.size) 302 i.err = err 303 if err == nil { 304 d := textproto.NewDecoder() 305 i.expr, i.err = d.Parse(cfg.Schema, path, b) 306 } 307 default: 308 i.err = fmt.Errorf("unsupported encoding %q", f.Encoding) 309 } 310 311 return i 312} 313 314func jsonSchemaFunc(cfg *Config, f *build.File) interpretFunc { 315 return func(v cue.Value) (file *ast.File, err error) { 316 tags := boolTagsForFile(f, build.JSONSchema) 317 cfg := &jsonschema.Config{ 318 PkgName: cfg.PkgName, 319 320 // Note: we don't populate Strict because then we'd 321 // be ignoring the values of the other tags when it's true, 322 // and there's (deliberately) nothing that Strict does that 323 // cannot be described by the other two keywords. 324 // The strictKeywords and strictFeatures tags are 325 // set by internal/filetypes from the strict tag when appropriate. 326 327 StrictKeywords: tags["strictKeywords"], 328 StrictFeatures: tags["strictFeatures"], 329 OpenOnlyWhenExplicit: tags["openOnlyWhenExplicit"], 330 } 331 file, err = jsonschema.Extract(v, cfg) 332 // TODO: simplify currently erases file line info. Reintroduce after fix. 333 // file, err = simplify(file, err) 334 return file, err 335 } 336} 337 338func openAPIFunc(c *Config, f *build.File) interpretFunc { 339 return func(v cue.Value) (file *ast.File, err error) { 340 tags := boolTagsForFile(f, build.JSONSchema) 341 file, err = openapi.Extract(v, &openapi.Config{ 342 PkgName: c.PkgName, 343 344 // Note: don't populate Strict (see more detailed 345 // comment in jsonSchemaFunc) 346 347 StrictKeywords: tags["strictKeywords"], 348 StrictFeatures: tags["strictFeatures"], 349 }) 350 // TODO: simplify currently erases file line info. Reintroduce after fix. 351 // file, err = simplify(file, err) 352 return file, err 353 } 354} 355 356func protobufJSONFunc(cfg *Config, file *build.File) rewriteFunc { 357 return func(f *ast.File) (*ast.File, error) { 358 if !cfg.Schema.Exists() { 359 return f, errors.Newf(token.NoPos, 360 "no schema specified for protobuf interpretation.") 361 } 362 return f, jsonpb.NewDecoder(cfg.Schema).RewriteFile(f) 363 } 364} 365 366func boolTagsForFile(f *build.File, interp build.Interpretation) map[string]bool { 367 if f.Interpretation != build.Auto { 368 return f.BoolTags 369 } 370 defaultTags := filetypes.DefaultTagsForInterpretation(interp, filetypes.Input) 371 if len(defaultTags) == 0 { 372 return f.BoolTags 373 } 374 // We _could_ probably mutate f.Tags directly, but that doesn't 375 // seem quite right as it's been passed in from outside of internal/encoding. 376 // So go the extra mile and make a new map. 377 378 // Set values for tags that have a default value but aren't 379 // present in f.Tags. 380 var tags map[string]bool 381 for tag, val := range defaultTags { 382 if _, ok := f.BoolTags[tag]; ok { 383 continue 384 } 385 if tags == nil { 386 tags = make(map[string]bool) 387 } 388 tags[tag] = val 389 } 390 if tags == nil { 391 return f.BoolTags 392 } 393 maps.Copy(tags, f.BoolTags) 394 return tags 395} 396 397func shouldValidate(i *filetypes.FileInfo) bool { 398 // TODO: We ignore attributes for now. They should be enabled by default. 399 return false || 400 !i.Definitions || 401 !i.Data || 402 !i.Optional || 403 !i.Constraints || 404 !i.References || 405 !i.Cycles || 406 !i.KeepDefaults || 407 !i.Incomplete || 408 !i.Imports || 409 !i.Docs 410} 411 412type validator struct { 413 allErrors bool 414 count int 415 errs errors.Error 416 fileinfo *filetypes.FileInfo 417} 418 419func (d *Decoder) validate(f *ast.File, b *build.File) { 420 if d.err != nil { 421 return 422 } 423 fi, err := filetypes.FromFile(b, filetypes.Input) 424 if err != nil { 425 d.err = err 426 return 427 } 428 if !shouldValidate(fi) { 429 return 430 } 431 432 v := validator{fileinfo: fi, allErrors: d.cfg.AllErrors} 433 ast.Walk(f, v.validate, nil) 434 d.err = v.errs 435} 436 437func (v *validator) validate(n ast.Node) bool { 438 if v.count > 10 { 439 return false 440 } 441 442 i := v.fileinfo 443 444 // TODO: Cycles 445 446 ok := true 447 check := func(n ast.Node, option bool, s string, cond bool) { 448 if !option && cond { 449 v.errs = errors.Append(v.errs, errors.Newf(n.Pos(), 450 "%s not allowed in %s mode", s, v.fileinfo.Form)) 451 v.count++ 452 ok = false 453 } 454 } 455 456 // For now we don't make any distinction between these modes. 457 458 constraints := i.Constraints && i.Incomplete && i.Optional && i.References 459 460 check(n, i.Docs, "comments", len(ast.Comments(n)) > 0) 461 462 switch x := n.(type) { 463 case *ast.CommentGroup: 464 check(n, i.Docs, "comments", len(ast.Comments(n)) > 0) 465 return false 466 467 case *ast.ImportDecl, *ast.ImportSpec: 468 check(n, i.Imports, "imports", true) 469 470 case *ast.Field: 471 check(n, i.Definitions, "definitions", internal.IsDefinition(x.Label)) 472 check(n, i.Data, "regular fields", internal.IsRegularField(x)) 473 474 _, _, err := ast.LabelName(x.Label) 475 check(n, constraints, "optional fields", err != nil) 476 477 check(n, i.Attributes, "attributes", len(x.Attrs) > 0) 478 ast.Walk(x.Value, v.validate, nil) 479 return false 480 481 case *ast.UnaryExpr: 482 switch x.Op { 483 case token.MUL: 484 check(n, i.KeepDefaults, "default values", true) 485 case token.SUB, token.ADD: 486 // The parser represents negative numbers as an unary expression. 487 // Allow one `-` or `+`. 488 _, ok := x.X.(*ast.BasicLit) 489 check(n, constraints, "expressions", !ok) 490 case token.LSS, token.LEQ, token.EQL, token.GEQ, token.GTR, 491 token.NEQ, token.NMAT, token.MAT: 492 check(n, constraints, "constraints", true) 493 default: 494 check(n, constraints, "expressions", true) 495 } 496 497 case *ast.BinaryExpr, *ast.ParenExpr, *ast.IndexExpr, *ast.SliceExpr, 498 *ast.CallExpr, *ast.Comprehension, *ast.Interpolation: 499 check(n, constraints, "expressions", true) 500 501 case *ast.Ellipsis: 502 check(n, constraints, "ellipsis", true) 503 504 case *ast.Ident, *ast.SelectorExpr, *ast.Alias, *ast.LetClause: 505 check(n, i.References, "references", true) 506 507 default: 508 // Other types are either always okay or handled elsewhere. 509 } 510 return ok 511}