1package module
2
3import (
4 "errors"
5 "fmt"
6 "regexp"
7 "strings"
8 "sync"
9 "unicode"
10 "unicode/utf8"
11
12 "cuelang.org/go/cue/ast"
13 "cuelang.org/go/internal/mod/semver"
14)
15
16// The following regular expressions come from https://github.com/opencontainers/distribution-spec/blob/main/spec.md#pulling-manifests
17// and ensure that we can store modules inside OCI registries.
18var (
19 basePathPat = sync.OnceValue(func() *regexp.Regexp {
20 return regexp.MustCompile(`^[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*(/[a-z0-9]+((\.|_|__|-+)[a-z0-9]+)*)*$`)
21 })
22 tagPat = sync.OnceValue(func() *regexp.Regexp {
23 return regexp.MustCompile(`^[a-zA-Z0-9_][a-zA-Z0-9._-]{0,127}$`)
24 })
25)
26
27// Check checks that a given module path, version pair is valid.
28// In addition to the path being a valid module path
29// and the version being a valid semantic version,
30// the two must correspond.
31// For example, the path "foo.com/bar@v2" only corresponds to
32// semantic versions beginning with "v2.".
33func Check(path, version string) error {
34 if err := CheckPath(path); err != nil {
35 return err
36 }
37 if !semver.IsValid(version) {
38 return &ModuleError{
39 Path: path,
40 Err: &InvalidVersionError{Version: version, Err: errors.New("not a semantic version")},
41 }
42 }
43 _, pathMajor, _ := ast.SplitPackageVersion(path)
44 if err := CheckPathMajor(version, pathMajor); err != nil {
45 return &ModuleError{Path: path, Err: err}
46 }
47 return nil
48}
49
50// firstPathOK reports whether r can appear in the first element of a module path.
51// The first element of the path must be an LDH domain name, at least for now.
52// To avoid case ambiguity, the domain name must be entirely lower case.
53func firstPathOK(r rune) bool {
54 return r == '-' || r == '.' ||
55 '0' <= r && r <= '9' ||
56 'a' <= r && r <= 'z'
57}
58
59// modPathOK reports whether r can appear in a module path element.
60// Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: - . _ and ~.
61func modPathOK(r rune) bool {
62 if r < utf8.RuneSelf {
63 return r == '-' || r == '.' || r == '_' ||
64 '0' <= r && r <= '9' ||
65 'a' <= r && r <= 'z'
66 }
67 return false
68}
69
70// importPathOK reports whether r can appear in a package import path element.
71//
72// Import paths are intermediate between module paths and file paths: we allow
73// disallow characters that would be confusing or ambiguous as arguments to
74// 'go get' (such as '@' and ' ' ), but allow certain characters that are
75// otherwise-unambiguous on the command line and historically used for some
76// binary names (such as '++' as a suffix for compiler binaries and wrappers).
77func importPathOK(r rune) bool {
78 return modPathOK(r) ||
79 r == '+' ||
80 r == '~' ||
81 'A' <= r && r <= 'Z'
82}
83
84// fileNameOK reports whether r can appear in a file name.
85// For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters.
86// If we expand the set of allowed characters here, we have to
87// work harder at detecting potential case-folding and normalization collisions.
88// See note about "escaped paths" above.
89func fileNameOK(r rune) bool {
90 if r < utf8.RuneSelf {
91 // Entire set of ASCII punctuation, from which we remove characters:
92 // ! " # $ % & ' ( ) * + , - . / : ; < = > ? [ \ ] ^ _ ` { | } ~
93 // We disallow some shell special characters: " ' * < > ? ` |
94 // (Note that some of those are disallowed by the Windows file system as well.)
95 // We also disallow path separators / : and \ (fileNameOK is only called on path element characters).
96 // We allow spaces (U+0020) in file names.
97 const allowed = "!#$%&()+,-.=@[]^_{}~ "
98 if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' {
99 return true
100 }
101 return strings.ContainsRune(allowed, r)
102 }
103 // It may be OK to add more ASCII punctuation here, but only carefully.
104 // For example Windows disallows < > \, and macOS disallows :, so we must not allow those.
105 return unicode.IsLetter(r)
106}
107
108// CheckPathWithoutVersion is like [CheckPath] except that
109// it expects a module path without a major version.
110func CheckPathWithoutVersion(basePath string) (err error) {
111 if _, _, ok := ast.SplitPackageVersion(basePath); ok {
112 return fmt.Errorf("module path inappropriately contains version")
113 }
114 if err := checkPath(basePath, modulePath); err != nil {
115 return err
116 }
117 firstPath, _, _ := strings.Cut(basePath, "/")
118 if firstPath == "" {
119 return fmt.Errorf("leading slash")
120 }
121 if !strings.Contains(firstPath, ".") {
122 return fmt.Errorf("missing dot in first path element")
123 }
124 if basePath[0] == '-' {
125 return fmt.Errorf("leading dash in first path element")
126 }
127 for _, r := range firstPath {
128 if !firstPathOK(r) {
129 return fmt.Errorf("invalid char %q in first path element", r)
130 }
131 }
132 // Sanity check agreement with OCI specs.
133 if !basePathPat().MatchString(basePath) {
134 return fmt.Errorf("path does not conform to OCI repository name restrictions; see https://github.com/opencontainers/distribution-spec/blob/HEAD/spec.md#pulling-manifests")
135 }
136 return nil
137}
138
139// CheckPath checks that a module path is valid.
140// A valid module path is a valid import path, as checked by CheckImportPath,
141// with three additional constraints.
142//
143// First, the leading path element (up to the first slash, if any),
144// by convention a domain name, must contain only lower-case ASCII letters,
145// ASCII digits, dots (U+002E), and dashes (U+002D);
146// it must contain at least one dot and cannot start with a dash.
147//
148// Second, there may be a final major version of the form
149// @vN where N looks numeric
150// (ASCII digits) and must not begin with a leading zero.
151// Without such a major version, the major version is assumed
152// to be v0.
153//
154// Third, no path element may begin with a dot.
155func CheckPath(mpath string) (err error) {
156 if mpath == "local" {
157 return nil
158 }
159 defer func() {
160 if err != nil {
161 err = &InvalidPathError{Kind: "module", Path: mpath, Err: err}
162 }
163 }()
164
165 basePath, vers, ok := ast.SplitPackageVersion(mpath)
166 if ok {
167 if semver.Major(vers) != vers {
168 return fmt.Errorf("path can contain major version only")
169 }
170 if !tagPat().MatchString(vers) {
171 return fmt.Errorf("non-conforming version %q", vers)
172 }
173 } else {
174 basePath = mpath
175 }
176 if err := CheckPathWithoutVersion(basePath); err != nil {
177 return err
178 }
179 return nil
180}
181
182// CheckImportPath checks that an import path is valid.
183//
184// A valid import path consists of one or more valid path elements
185// separated by slashes (U+002F), optionally followed by
186// an @vN (major version) qualifier.
187// The path part must not begin with nor end in a slash.
188//
189// A valid path element is a non-empty string made up of
190// lower case ASCII letters, ASCII digits, and limited ASCII punctuation: - . and _
191// Punctuation characters may not be adjacent and must be between non-punctuation
192// characters.
193//
194// The element prefix up to the first dot must not be a reserved file name
195// on Windows, regardless of case (CON, com1, NuL, and so on).
196func CheckImportPath(path string) error {
197 parts := ast.ParseImportPath(path)
198 if semver.Major(parts.Version) != parts.Version {
199 return &InvalidPathError{
200 Kind: "import",
201 Path: path,
202 Err: fmt.Errorf("import paths can only contain a major version specifier"),
203 }
204 }
205 if err := checkPath(parts.Path, importPath); err != nil {
206 return &InvalidPathError{Kind: "import", Path: path, Err: err}
207 }
208 return nil
209}
210
211// pathKind indicates what kind of path we're checking. Module paths,
212// import paths, and file paths have different restrictions.
213type pathKind int
214
215const (
216 modulePath pathKind = iota
217 importPath
218 filePath
219)
220
221// checkPath checks that a general path is valid. kind indicates what
222// specific constraints should be applied.
223//
224// checkPath returns an error describing why the path is not valid.
225// Because these checks apply to module, import, and file paths,
226// and because other checks may be applied, the caller is expected to wrap
227// this error with InvalidPathError.
228func checkPath(path string, kind pathKind) error {
229 if !utf8.ValidString(path) {
230 return fmt.Errorf("invalid UTF-8")
231 }
232 if path == "" {
233 return fmt.Errorf("empty string")
234 }
235 if path[0] == '-' && kind != filePath {
236 return fmt.Errorf("leading dash")
237 }
238 if strings.Contains(path, "//") {
239 return fmt.Errorf("double slash")
240 }
241 if path[len(path)-1] == '/' {
242 return fmt.Errorf("trailing slash")
243 }
244 elemStart := 0
245 for i, r := range path {
246 if r == '/' {
247 if err := checkElem(path[elemStart:i], kind); err != nil {
248 return err
249 }
250 elemStart = i + 1
251 }
252 }
253 if err := checkElem(path[elemStart:], kind); err != nil {
254 return err
255 }
256 return nil
257}
258
259// checkElem checks whether an individual path element is valid.
260func checkElem(elem string, kind pathKind) error {
261 if elem == "" {
262 return fmt.Errorf("empty path element")
263 }
264 if strings.Count(elem, ".") == len(elem) {
265 return fmt.Errorf("invalid path element %q", elem)
266 }
267
268 if kind == modulePath {
269
270 if r := rune(elem[0]); r == '.' || r == '_' || r == '-' {
271 return fmt.Errorf("leading %q in path element", r)
272 }
273 if r := rune(elem[len(elem)-1]); r == '.' || r == '_' || r == '-' {
274 return fmt.Errorf("trailing %q in path element", r)
275 }
276 } else if elem[len(elem)-1] == '.' {
277 return fmt.Errorf("trailing dot in path element")
278 }
279 for _, r := range elem {
280 ok := false
281 switch kind {
282 case modulePath:
283 ok = modPathOK(r)
284 case importPath:
285 ok = importPathOK(r)
286 case filePath:
287 ok = fileNameOK(r)
288 default:
289 panic(fmt.Sprintf("internal error: invalid kind %v", kind))
290 }
291 if !ok {
292 return fmt.Errorf("invalid char %q", r)
293 }
294 }
295 // Windows disallows a bunch of path elements, sadly.
296 // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
297 short, _, _ := strings.Cut(elem, ".")
298 for _, bad := range badWindowsNames {
299 if strings.EqualFold(bad, short) {
300 return fmt.Errorf("%q disallowed as path element component on Windows", short)
301 }
302 }
303
304 if kind == filePath {
305 // don't check for Windows short-names in file names. They're
306 // only an issue for import paths.
307 return nil
308 }
309
310 // Reject path components that look like Windows short-names.
311 // Those usually end in a tilde followed by one or more ASCII digits.
312 if tilde := strings.LastIndexByte(short, '~'); tilde >= 0 && tilde < len(short)-1 {
313 suffix := short[tilde+1:]
314 suffixIsDigits := true
315 for _, r := range suffix {
316 if r < '0' || r > '9' {
317 suffixIsDigits = false
318 break
319 }
320 }
321 if suffixIsDigits {
322 return fmt.Errorf("trailing tilde and digits in path element")
323 }
324 }
325
326 return nil
327}
328
329// CheckFilePath checks that a slash-separated file path is valid.
330// The definition of a valid file path is the same as the definition
331// of a valid import path except that the set of allowed characters is larger:
332// all Unicode letters, ASCII digits, the ASCII space character (U+0020),
333// and the ASCII punctuation characters
334// “!#$%&()+,-.=@[]^_{}~”.
335// (The excluded punctuation characters, " * < > ? ` ' | / \ and :,
336// have special meanings in certain shells or operating systems.)
337//
338// CheckFilePath may be less restrictive in the future, but see the
339// top-level package documentation for additional information about
340// subtleties of Unicode.
341func CheckFilePath(path string) error {
342 if err := checkPath(path, filePath); err != nil {
343 return &InvalidPathError{Kind: "file", Path: path, Err: err}
344 }
345 return nil
346}
347
348// badWindowsNames are the reserved file path elements on Windows.
349// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
350var badWindowsNames = []string{
351 "CON",
352 "PRN",
353 "AUX",
354 "NUL",
355 "COM1",
356 "COM2",
357 "COM3",
358 "COM4",
359 "COM5",
360 "COM6",
361 "COM7",
362 "COM8",
363 "COM9",
364 "LPT1",
365 "LPT2",
366 "LPT3",
367 "LPT4",
368 "LPT5",
369 "LPT6",
370 "LPT7",
371 "LPT8",
372 "LPT9",
373}
374
375// SplitPathVersion returns a prefix and version suffix such that
376// prefix+"@"+version == path.
377//
378// SplitPathVersion returns (path, "", false) when there is no `@`
379// character splitting the path or if the version is empty.
380//
381// It does not check that the version is valid in any way other than
382// checking that it is not empty.
383//
384// For example:
385//
386// SplitPathVersion("foo.com/bar@v0.1") returns ("foo.com/bar", "v0.1", true).
387// SplitPathVersion("foo.com/bar@badvers") returns ("foo.com/bar", "badvers", true).
388// SplitPathVersion("foo.com/bar") returns ("foo.com/bar", "", false).
389// SplitPathVersion("foo.com/bar@") returns ("foo.com/bar@", "", false).
390//
391// Deprecated: use [ast.SplitPackageVersion] instead.
392//
393//go:fix inline
394func SplitPathVersion(path string) (prefix, version string, ok bool) {
395 return ast.SplitPackageVersion(path)
396}
397
398// ImportPath holds the various components of an import path.
399//
400// Deprecated: use [ast.ImportPath] instead.
401//
402//go:fix inline
403type ImportPath = ast.ImportPath
404
405// ParseImportPath returns the various components of an import path.
406// It does not check the result for validity.
407//
408// Deprecated: use [ast.ParseImportPath] instead.
409//
410//go:fix inline
411func ParseImportPath(p string) ast.ImportPath {
412 return ast.ParseImportPath(p)
413}
414
415// CheckPathMajor returns a non-nil error if the semantic version v
416// does not match the path major version pathMajor.
417func CheckPathMajor(v, pathMajor string) error {
418 if m := semver.Major(v); m != pathMajor {
419 return &InvalidVersionError{
420 Version: v,
421 Err: fmt.Errorf("should be %s, not %s", pathMajor, m),
422 }
423 }
424 return nil
425}