1// Copyright 2023 CUE Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package modfile provides functionality for reading and parsing
16// the CUE module file, cue.mod/module.cue.
17//
18// WARNING: THIS PACKAGE IS EXPERIMENTAL.
19// ITS API MAY CHANGE AT ANY TIME.
20package modfile
21
22import (
23 _ "embed"
24 "fmt"
25 "strings"
26 "sync"
27
28 "cuelang.org/go/cue"
29 "cuelang.org/go/cue/ast"
30 "cuelang.org/go/cue/build"
31 "cuelang.org/go/cue/cuecontext"
32 "cuelang.org/go/cue/errors"
33 "cuelang.org/go/cue/format"
34 "cuelang.org/go/cue/token"
35 "cuelang.org/go/internal/cueversion"
36 "cuelang.org/go/internal/encoding"
37 "cuelang.org/go/internal/filetypes"
38 "cuelang.org/go/internal/mod/modfiledata"
39 "cuelang.org/go/internal/mod/semver"
40)
41
42//go:embed schema.cue
43var moduleSchemaData string
44
45const schemaFile = "cuelang.org/go/mod/modfile/schema.cue"
46
47type (
48 // File represents the contents of a cue.mod/module.cue file.
49 File = modfiledata.File
50
51 // Source represents how to transform from a module's
52 // source to its actual contents.
53 Source = modfiledata.Source
54
55 Language = modfiledata.Language
56 Dep = modfiledata.Dep
57)
58
59// baseFileVersion is used to decode the language version
60// to decide how to decode the rest of the file.
61type baseFileVersion struct {
62 Language struct {
63 Version string `json:"version"`
64 } `json:"language"`
65}
66
67// Format returns a formatted representation of f
68// in CUE syntax.
69func Format(f *File) ([]byte, error) {
70 if len(f.Deps) == 0 && f.Deps != nil {
71 // There's no way to get the CUE encoder to omit an empty
72 // but non-nil slice (despite the current doc comment on
73 // [cue.Context.Encode], so make a copy of f to allow us
74 // to do that.
75 f1 := *f
76 f1.Deps = nil
77 f = &f1
78 }
79 // TODO this could be better:
80 // - it should omit the outer braces
81 v := cuecontext.New().Encode(f)
82 if err := v.Validate(cue.Concrete(true)); err != nil {
83 return nil, err
84 }
85 n := v.Syntax(cue.Concrete(true)).(*ast.StructLit)
86
87 data, err := format.Node(&ast.File{
88 Decls: n.Elts,
89 })
90 if err != nil {
91 return nil, fmt.Errorf("cannot format: %v", err)
92 }
93 // Sanity check that it can be parsed.
94 // TODO this could be more efficient by checking all the file fields
95 // before formatting the output.
96 _, actualSchemaVersion, err := parse(data, "-", false)
97 if err != nil {
98 return nil, fmt.Errorf("cannot parse result: %v", strings.TrimSuffix(errors.Details(err, nil), "\n"))
99 }
100 if f.Language != nil && actualSchemaVersion == "v0.0.0" {
101 // It's not a legacy module file (because the language field is present)
102 // but we've used the legacy schema to parse it, which means that
103 // it's almost certainly a bogus version because all versions
104 // we care about fail when there are unknown fields, but the
105 // original schema allowed all fields.
106 return nil, fmt.Errorf("language version %v is too early for module.cue (need at least %v)", f.Language.Version, EarliestClosedSchemaVersion())
107 }
108 return data, err
109}
110
111type noDepsFile struct {
112 Module string `json:"module"`
113}
114
115var (
116 moduleSchemaOnce sync.Once // guards the creation of _moduleSchema
117 // TODO remove this mutex when https://cuelang.org/issue/2733 is fixed.
118 moduleSchemaMutex sync.Mutex // guards any use of _moduleSchema
119 _schemas schemaInfo
120)
121
122type schemaInfo struct {
123 Versions map[string]cue.Value `json:"versions"`
124 EarliestClosedSchemaVersion string `json:"earliestClosedSchemaVersion"`
125}
126
127// moduleSchemaDo runs f with information about all the schema versions
128// present in schema.cue. It does this within a mutex because it is
129// not currently allowed to use cue.Value concurrently.
130// TODO remove the mutex when https://cuelang.org/issue/2733 is fixed.
131func moduleSchemaDo[T any](f func(*schemaInfo) (T, error)) (T, error) {
132 moduleSchemaOnce.Do(func() {
133 // It is important that this cue.Context not be used for building any other cue.Value,
134 // such as in [Parse] or [ParseLegacy].
135 // A value holds memory as long as the context it was built with is kept alive for,
136 // and this context is alive forever via the _schemas global.
137 //
138 // TODO(mvdan): this violates the documented API rules in the cue package:
139 //
140 // Only values created from the same Context can be involved in the same operation.
141 //
142 // However, this appears to work in practice, and all alternatives right now would be
143 // either too costly or awkward. We want to lift that API restriction, and this works OK,
144 // so leave it as-is for the time being.
145 ctx := cuecontext.New()
146 schemav := ctx.CompileString(moduleSchemaData, cue.Filename(schemaFile))
147 if err := schemav.Decode(&_schemas); err != nil {
148 panic(fmt.Errorf("internal error: invalid CUE module.cue schema: %v", errors.Details(err, nil)))
149 }
150 })
151 moduleSchemaMutex.Lock()
152 defer moduleSchemaMutex.Unlock()
153 return f(&_schemas)
154}
155
156func lookup(v cue.Value, sels ...cue.Selector) cue.Value {
157 return v.LookupPath(cue.MakePath(sels...))
158}
159
160// EarliestClosedSchemaVersion returns the earliest module.cue schema version
161// that excludes unknown fields. Any version declared in a module.cue file
162// should be at least this, because that's when we added the language.version
163// field itself.
164func EarliestClosedSchemaVersion() string {
165 return earliestClosedSchemaVersion()
166}
167
168var earliestClosedSchemaVersion = sync.OnceValue(func() string {
169 earliest, _ := moduleSchemaDo(func(info *schemaInfo) (string, error) {
170 earliest := ""
171 for v := range info.Versions {
172 if earliest == "" || semver.Compare(v, earliest) < 0 {
173 earliest = v
174 }
175 }
176 return earliest, nil
177 })
178 return earliest
179})
180
181// Parse verifies that the module file has correct syntax
182// and follows the schema following the required language.version field.
183// The file name is used for error messages.
184// All dependencies must be specified correctly: with major
185// versions in the module paths and canonical dependency versions.
186func Parse(modfile []byte, filename string) (*File, error) {
187 f, _, err := parse(modfile, filename, true)
188 return f, err
189}
190
191// ParseLegacy parses the legacy version of the module file
192// that only supports the single field "module" and ignores all other
193// fields.
194func ParseLegacy(modfile []byte, filename string) (*File, error) {
195 ctx := cuecontext.New()
196 file, err := parseDataOnlyCUE(ctx, modfile, filename)
197 if err != nil {
198 return nil, errors.Wrapf(err, token.NoPos, "invalid module file syntax")
199 }
200 // Unfortunately we need a new context. See the note inside [moduleSchemaDo].
201 v := ctx.BuildFile(file)
202 if err := v.Err(); err != nil {
203 return nil, errors.Wrapf(err, token.NoPos, "invalid module file")
204 }
205 var f noDepsFile
206 if err := v.Decode(&f); err != nil {
207 return nil, newCUEError(err, filename)
208 }
209 return &File{
210 Module: f.Module,
211 }, nil
212}
213
214// ParseNonStrict is like [Parse] but allows some laxity in the parsing:
215// - if a module path lacks a version, it's taken from the version.
216// - if a non-canonical version is used, it will be canonicalized.
217//
218// The file name is used for error messages.
219func ParseNonStrict(modfile []byte, filename string) (*File, error) {
220 file, _, err := parse(modfile, filename, false)
221 return file, err
222}
223
224// FixLegacy converts a legacy module.cue file as parsed by [ParseLegacy]
225// into a format suitable for parsing with [Parse]. It adds a language.version
226// field and moves all unrecognized fields into custom.legacy.
227//
228// If there is no module field or it is empty, it is set to "test.example".
229//
230// If the file already parses OK with [ParseNonStrict], it returns the
231// result of that.
232func FixLegacy(modfile []byte, filename string) (*File, error) {
233 f, err := ParseNonStrict(modfile, filename)
234 if err == nil {
235 // It parses OK so it doesn't need fixing.
236 return f, nil
237 }
238 ctx := cuecontext.New()
239 file, err := parseDataOnlyCUE(ctx, modfile, filename)
240 if err != nil {
241 return nil, errors.Wrapf(err, token.NoPos, "invalid module file syntax")
242 }
243 v := ctx.BuildFile(file)
244 if err := v.Validate(cue.Concrete(true)); err != nil {
245 return nil, errors.Wrapf(err, token.NoPos, "invalid module file value")
246 }
247 var allFields map[string]any
248 if err := v.Decode(&allFields); err != nil {
249 return nil, err
250 }
251 mpath := "test.example"
252 if m, ok := allFields["module"]; ok {
253 if mpath1, ok := m.(string); ok && mpath1 != "" {
254 mpath = mpath1
255 } else if !ok {
256 return nil, fmt.Errorf("module field has unexpected type %T", m)
257 }
258 // TODO decide what to do if the module path isn't OK according to the new rules.
259 }
260 customLegacy := make(map[string]any)
261 for k, v := range allFields {
262 if k != "module" {
263 customLegacy[k] = v
264 }
265 }
266 var custom map[string]map[string]any
267 if len(customLegacy) > 0 {
268 custom = map[string]map[string]any{
269 "legacy": customLegacy,
270 }
271 }
272 f = &File{
273 Module: mpath,
274 Language: &Language{
275 // If there's a legacy module file, the CUE code
276 // is unlikely to be using new language features,
277 // so keep the language version fixed rather than
278 // using [cueversion.LanguageVersion].
279 // See https://cuelang.org/issue/3222.
280 Version: "v0.9.0",
281 },
282 Custom: custom,
283 }
284 // Round-trip through [Parse] so that we get exactly the same
285 // result as a later parse of the same data will. This also
286 // adds a major version to the module path if needed.
287 data, err := Format(f)
288 if err != nil {
289 return nil, fmt.Errorf("cannot format fixed file: %v", err)
290 }
291 f, err = ParseNonStrict(data, "fixed-"+filename)
292 if err != nil {
293 return nil, fmt.Errorf("cannot parse resulting module file %q: %v", data, err)
294 }
295 return f, nil
296}
297
298func parse(modfile []byte, filename string, strict bool) (file *File, actualSchemaVersion string, err error) {
299 // Unfortunately we need a new context. See the note inside [moduleSchemaDo].
300 ctx := cuecontext.New()
301 astFile, err := parseDataOnlyCUE(ctx, modfile, filename)
302 if err != nil {
303 return nil, "", errors.Wrapf(err, token.NoPos, "invalid module file syntax")
304 }
305
306 v := ctx.BuildFile(astFile)
307 if err := v.Validate(cue.Concrete(true)); err != nil {
308 return nil, "", errors.Wrapf(err, token.NoPos, "invalid module file value")
309 }
310 // First determine the declared version of the module file.
311 var base baseFileVersion
312 if err := v.Decode(&base); err != nil {
313 return nil, "", errors.Wrapf(err, token.NoPos, "cannot determine language version")
314 }
315 if base.Language.Version == "" {
316 return nil, "", ErrNoLanguageVersion
317 }
318 if !semver.IsValid(base.Language.Version) {
319 return nil, "", fmt.Errorf("language version %q in module.cue is not valid semantic version", base.Language.Version)
320 }
321 if mv, lv := base.Language.Version, cueversion.LanguageVersion(); semver.Compare(mv, lv) > 0 {
322 return nil, "", fmt.Errorf("language version %q declared in module.cue is too new for current language version %q", mv, lv)
323 }
324 type result struct {
325 file *File
326 actualSchemaVersion string
327 }
328 r, err := moduleSchemaDo(func(schemas *schemaInfo) (result, error) {
329 // Now that we're happy we're within bounds, find the latest
330 // schema that applies to the declared version.
331 latest := ""
332 var latestSchema cue.Value
333 for vers, schema := range schemas.Versions {
334 if semver.Compare(vers, base.Language.Version) > 0 {
335 continue
336 }
337 if latest == "" || semver.Compare(vers, latest) > 0 {
338 latest = vers
339 latestSchema = schema
340 }
341 }
342 if latest == "" {
343 // Should never happen, because there should always
344 // be some applicable schema.
345 return result{}, fmt.Errorf("cannot find schema suitable for reading module file with language version %q", base.Language.Version)
346 }
347 schema := latestSchema
348 v = v.Unify(lookup(schema, cue.Def("#File")))
349 if err := v.Validate(); err != nil {
350 return result{}, newCUEError(err, filename)
351 }
352 if latest == "v0.0.0" {
353 // The chosen schema is the earliest schema which allowed
354 // all fields. We don't actually want a module.cue file with
355 // an old version to treat those fields as special, so don't try
356 // to decode into *File because that will do so.
357 // This mirrors the behavior of [ParseLegacy].
358 var f noDepsFile
359 if err := v.Decode(&f); err != nil {
360 return result{}, newCUEError(err, filename)
361 }
362 return result{
363 file: &File{
364 Module: f.Module,
365 },
366 actualSchemaVersion: latest,
367 }, nil
368 }
369 var mf File
370 if err := v.Decode(&mf); err != nil {
371 return result{}, errors.Wrapf(err, token.NoPos, "internal error: cannot decode into modFile struct")
372 }
373 return result{
374 file: &mf,
375 actualSchemaVersion: latest,
376 }, nil
377 })
378 if err != nil {
379 return nil, "", err
380 }
381 if strict {
382 err = r.file.Init()
383 } else {
384 err = r.file.InitNonStrict()
385 }
386 if err != nil {
387 return nil, "", fmt.Errorf("invalid module file %s: %v", filename, err)
388 }
389 return r.file, r.actualSchemaVersion, nil
390}
391
392// ErrNoLanguageVersion is returned by [Parse] and [ParseNonStrict]
393// when a cue.mod/module.cue file lacks the `language.version` field.
394var ErrNoLanguageVersion = fmt.Errorf("no language version declared in module.cue")
395
396func parseDataOnlyCUE(ctx *cue.Context, cueData []byte, filename string) (*ast.File, error) {
397 dec := encoding.NewDecoder(ctx, &build.File{
398 Filename: filename,
399 Encoding: build.CUE,
400 Interpretation: build.Auto,
401 Form: build.Data,
402 Source: cueData,
403 }, &encoding.Config{
404 Mode: filetypes.Export,
405 AllErrors: true,
406 })
407 if err := dec.Err(); err != nil {
408 return nil, err
409 }
410 return dec.File(), nil
411}
412
413func newCUEError(err error, filename string) error {
414 ps := errors.Positions(err)
415 for _, p := range ps {
416 if errStr := findErrorComment(p); errStr != "" {
417 return fmt.Errorf("invalid module file: %s", errStr)
418 }
419 }
420 // TODO we have more potential to improve error messages here.
421 return err
422}
423
424// findErrorComment finds an error comment in the form
425//
426// //error: ...
427//
428// before the given position.
429// This works as a kind of poor-man's error primitive
430// so we can customize the error strings when verification
431// fails.
432func findErrorComment(p token.Pos) string {
433 if p.Filename() != schemaFile {
434 return ""
435 }
436 off := p.Offset()
437 source := moduleSchemaData
438 if off > len(source) {
439 return ""
440 }
441 source, _, ok := cutLast(source[:off], "\n")
442 if !ok {
443 return ""
444 }
445 _, errorLine, ok := cutLast(source, "\n")
446 if !ok {
447 return ""
448 }
449 errStr, ok := strings.CutPrefix(errorLine, "//error: ")
450 if !ok {
451 return ""
452 }
453 return errStr
454}
455
456func cutLast(s, sep string) (before, after string, found bool) {
457 if i := strings.LastIndex(s, sep); i >= 0 {
458 return s[:i], s[i+len(sep):], true
459 }
460 return "", s, false
461}