[mirror] Scalable static site server for Git forges (like GitHub Pages)
1package git_pages
2
3import (
4 "crypto/sha256"
5 "encoding/base64"
6 "encoding/json"
7 "errors"
8 "fmt"
9 "net"
10 "net/http"
11 "net/url"
12 "slices"
13 "strings"
14 "time"
15
16 "golang.org/x/net/idna"
17)
18
19type AuthError struct {
20 code int
21 error string
22}
23
24func (e AuthError) Error() string {
25 return e.error
26}
27
28func IsUnauthorized(err error) bool {
29 var authErr AuthError
30 if errors.As(err, &authErr) {
31 return authErr.code == http.StatusUnauthorized
32 }
33 return false
34}
35
36func authorizeInsecure(r *http.Request) *Authorization {
37 if config.Insecure { // for testing only
38 logc.Println(r.Context(), "auth: INSECURE mode")
39 return &Authorization{
40 repoURLs: nil,
41 branch: "pages",
42 }
43 }
44 return nil
45}
46
47var idnaProfile = idna.New(idna.MapForLookup(), idna.BidiRule())
48
49func GetHost(r *http.Request) (string, error) {
50 host, _, err := net.SplitHostPort(r.Host)
51 if err != nil {
52 host = r.Host
53 }
54 // this also rejects invalid characters and labels
55 host, err = idnaProfile.ToASCII(host)
56 if err != nil {
57 if config.Feature("relaxed-idna") {
58 // unfortunately, the go IDNA library has some significant issues around its
59 // Unicode TR46 implementation: https://github.com/golang/go/issues/76804
60 // we would like to allow *just* the _ here, but adding `idna.StrictDomainName(false)`
61 // would also accept domains like `*.foo.bar` which should clearly be disallowed.
62 // as a workaround, accept a domain name if it is valid with all `_` characters
63 // replaced with an alphanumeric character (we use `a`); this allows e.g. `foo_bar.xxx`
64 // and `foo__bar.xxx`, as well as `_foo.xxx` and `foo_.xxx`. labels starting with
65 // an underscore are explicitly rejected below.
66 _, err = idnaProfile.ToASCII(strings.ReplaceAll(host, "_", "a"))
67 }
68 if err != nil {
69 return "", AuthError{http.StatusBadRequest,
70 fmt.Sprintf("malformed host name %q", host)}
71 }
72 }
73 if strings.HasPrefix(host, ".") || strings.HasPrefix(host, "_") {
74 return "", AuthError{http.StatusBadRequest,
75 fmt.Sprintf("reserved host name %q", host)}
76 }
77 host = strings.TrimSuffix(host, ".")
78 return host, nil
79}
80
81func IsValidProjectName(name string) bool {
82 return !strings.HasPrefix(name, ".") && !strings.Contains(name, "%")
83}
84
85func GetProjectName(r *http.Request) (string, error) {
86 // path must be either `/` or `/foo/` (`/foo` is accepted as an alias)
87 path := strings.TrimPrefix(strings.TrimSuffix(r.URL.Path, "/"), "/")
88 if !IsValidProjectName(path) {
89 return "", AuthError{http.StatusBadRequest,
90 fmt.Sprintf("directory name %q is reserved", ".index")}
91 } else if strings.Contains(path, "/") {
92 return "", AuthError{http.StatusBadRequest,
93 "directories nested too deep"}
94 }
95
96 if path == "" {
97 // path `/` corresponds to pseudo-project `.index`
98 return ".index", nil
99 } else {
100 return path, nil
101 }
102}
103
104type Authorization struct {
105 // If `nil`, any URL is allowed. If not, only those in the set are allowed.
106 repoURLs []string
107 // Only the exact branch is allowed.
108 branch string
109}
110
111func authorizeDNSChallenge(r *http.Request) (*Authorization, error) {
112 host, err := GetHost(r)
113 if err != nil {
114 return nil, err
115 }
116
117 authorization := r.Header.Get("Authorization")
118 if authorization == "" {
119 return nil, AuthError{http.StatusUnauthorized,
120 "missing Authorization header"}
121 }
122
123 scheme, param, success := strings.Cut(authorization, " ")
124 if !success {
125 return nil, AuthError{http.StatusBadRequest,
126 "malformed Authorization header"}
127 }
128
129 if scheme != "Pages" && scheme != "Basic" {
130 return nil, AuthError{http.StatusBadRequest,
131 "unknown Authorization scheme"}
132 }
133
134 // services like GitHub and Gogs cannot send a custom Authorization: header, but supplying
135 // username and password in the URL is basically just as good
136 if scheme == "Basic" {
137 basicParam, err := base64.StdEncoding.DecodeString(param)
138 if err != nil {
139 return nil, AuthError{http.StatusBadRequest,
140 "malformed Authorization: Basic header"}
141 }
142
143 username, password, found := strings.Cut(string(basicParam), ":")
144 if !found {
145 return nil, AuthError{http.StatusBadRequest,
146 "malformed Authorization: Basic parameter"}
147 }
148
149 if username != "Pages" {
150 return nil, AuthError{http.StatusUnauthorized,
151 "unexpected Authorization: Basic username"}
152 }
153
154 param = password
155 }
156
157 challengeHostname := fmt.Sprintf("_git-pages-challenge.%s", host)
158 actualChallenges, err := net.LookupTXT(challengeHostname)
159 if err != nil {
160 return nil, AuthError{http.StatusUnauthorized,
161 fmt.Sprintf("failed to look up DNS challenge: %s TXT", challengeHostname)}
162 }
163
164 expectedChallenge := fmt.Sprintf("%x", sha256.Sum256(fmt.Appendf(nil, "%s %s", host, param)))
165 if !slices.Contains(actualChallenges, expectedChallenge) {
166 return nil, AuthError{http.StatusUnauthorized, fmt.Sprintf(
167 "defeated by DNS challenge: %s TXT %v does not include %s",
168 challengeHostname,
169 actualChallenges,
170 expectedChallenge,
171 )}
172 }
173
174 return &Authorization{
175 repoURLs: nil, // any
176 branch: "pages",
177 }, nil
178}
179
180func authorizeDNSAllowlist(r *http.Request) (*Authorization, error) {
181 host, err := GetHost(r)
182 if err != nil {
183 return nil, err
184 }
185
186 projectName, err := GetProjectName(r)
187 if err != nil {
188 return nil, err
189 }
190
191 allowlistHostname := fmt.Sprintf("_git-pages-repository.%s", host)
192 records, err := net.LookupTXT(allowlistHostname)
193 if err != nil {
194 return nil, AuthError{http.StatusUnauthorized,
195 fmt.Sprintf("failed to look up DNS repository allowlist: %s TXT", allowlistHostname)}
196 }
197
198 if projectName != ".index" {
199 return nil, AuthError{http.StatusUnauthorized,
200 "DNS repository allowlist only authorizes index site"}
201 }
202
203 var (
204 repoURLs []string
205 errs []error
206 )
207 for _, record := range records {
208 if parsedURL, err := url.Parse(record); err != nil {
209 errs = append(errs, fmt.Errorf("failed to parse URL: %s TXT %q", allowlistHostname, record))
210 } else if !parsedURL.IsAbs() {
211 errs = append(errs, fmt.Errorf("repository URL is not absolute: %s TXT %q", allowlistHostname, record))
212 } else {
213 repoURLs = append(repoURLs, record)
214 }
215 }
216
217 if len(repoURLs) == 0 {
218 if len(records) > 0 {
219 errs = append([]error{AuthError{http.StatusUnauthorized,
220 fmt.Sprintf("no valid DNS TXT records for %s", allowlistHostname)}},
221 errs...)
222 return nil, joinErrors(errs...)
223 } else {
224 return nil, AuthError{http.StatusUnauthorized,
225 fmt.Sprintf("no DNS TXT records found for %s", allowlistHostname)}
226 }
227 }
228
229 return &Authorization{
230 repoURLs: repoURLs,
231 branch: "pages",
232 }, err
233}
234
235// used for `/.git-pages/...` metadata
236func authorizeWildcardMatchHost(r *http.Request, pattern *WildcardPattern) (*Authorization, error) {
237 host, err := GetHost(r)
238 if err != nil {
239 return nil, err
240 }
241
242 if _, found := pattern.Matches(host); found {
243 return &Authorization{
244 repoURLs: []string{},
245 branch: "",
246 }, nil
247 } else {
248 return nil, AuthError{
249 http.StatusUnauthorized,
250 fmt.Sprintf("domain %s does not match wildcard %s", host, pattern.GetHost()),
251 }
252 }
253}
254
255// used for updates to site content
256func authorizeWildcardMatchSite(r *http.Request, pattern *WildcardPattern) (*Authorization, error) {
257 host, err := GetHost(r)
258 if err != nil {
259 return nil, err
260 }
261
262 projectName, err := GetProjectName(r)
263 if err != nil {
264 return nil, err
265 }
266
267 if userName, found := pattern.Matches(host); found {
268 repoURLs, branch := pattern.ApplyTemplate(userName, projectName)
269 return &Authorization{repoURLs, branch}, nil
270 } else {
271 return nil, AuthError{
272 http.StatusUnauthorized,
273 fmt.Sprintf("domain %s does not match wildcard %s", host, pattern.GetHost()),
274 }
275 }
276}
277
278// used for compatibility with Codeberg Pages v2
279// see https://docs.codeberg.org/codeberg-pages/using-custom-domain/
280func authorizeCodebergPagesV2(r *http.Request) (*Authorization, error) {
281 host, err := GetHost(r)
282 if err != nil {
283 return nil, err
284 }
285
286 dnsRecords := []string{}
287
288 cnameRecord, err := net.LookupCNAME(host)
289 // "LookupCNAME does not return an error if host does not contain DNS "CNAME" records,
290 // as long as host resolves to address records.
291 if err == nil && cnameRecord != host {
292 // LookupCNAME() returns a domain with the root label, i.e. `username.codeberg.page.`,
293 // with the trailing dot
294 dnsRecords = append(dnsRecords, strings.TrimSuffix(cnameRecord, "."))
295 }
296
297 txtRecords, err := net.LookupTXT(host)
298 if err == nil {
299 dnsRecords = append(dnsRecords, txtRecords...)
300 }
301
302 if len(dnsRecords) > 0 {
303 logc.Printf(r.Context(), "auth: %s TXT/CNAME: %q\n", host, dnsRecords)
304 }
305
306 for _, dnsRecord := range dnsRecords {
307 domainParts := strings.Split(dnsRecord, ".")
308 slices.Reverse(domainParts)
309 if domainParts[0] == "" {
310 domainParts = domainParts[1:]
311 }
312 if len(domainParts) >= 3 && len(domainParts) <= 5 {
313 if domainParts[0] == "page" && domainParts[1] == "codeberg" {
314 // map of domain names to allowed repository and branch:
315 // * {username}.codeberg.page =>
316 // https://codeberg.org/{username}/pages.git#main
317 // * {reponame}.{username}.codeberg.page =>
318 // https://codeberg.org/{username}/{reponame}.git#pages
319 // * {branch}.{reponame}.{username}.codeberg.page =>
320 // https://codeberg.org/{username}/{reponame}.git#{branch}
321 username := domainParts[2]
322 reponame := "pages"
323 branch := "main"
324 if len(domainParts) >= 4 {
325 reponame = domainParts[3]
326 branch = "pages"
327 }
328 if len(domainParts) == 5 {
329 branch = domainParts[4]
330 }
331 return &Authorization{
332 repoURLs: []string{
333 fmt.Sprintf("https://codeberg.org/%s/%s.git", username, reponame),
334 },
335 branch: branch,
336 }, nil
337 }
338 }
339 }
340
341 return nil, AuthError{
342 http.StatusUnauthorized,
343 fmt.Sprintf("domain %s does not have Codeberg Pages TXT or CNAME records", host),
344 }
345}
346
347// Checks whether an operation that enables enumerating site contents is allowed.
348func AuthorizeMetadataRetrieval(r *http.Request) (*Authorization, error) {
349 causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}}
350
351 auth := authorizeInsecure(r)
352 if auth != nil {
353 return auth, nil
354 }
355
356 auth, err := authorizeDNSChallenge(r)
357 if err != nil && IsUnauthorized(err) {
358 causes = append(causes, err)
359 } else if err != nil { // bad request
360 return nil, err
361 } else {
362 logc.Println(r.Context(), "auth: DNS challenge")
363 return auth, nil
364 }
365
366 for _, pattern := range wildcards {
367 auth, err = authorizeWildcardMatchHost(r, pattern)
368 if err != nil && IsUnauthorized(err) {
369 causes = append(causes, err)
370 } else if err != nil { // bad request
371 return nil, err
372 } else {
373 logc.Printf(r.Context(), "auth: wildcard %s\n", pattern.GetHost())
374 return auth, nil
375 }
376 }
377
378 if config.Feature("codeberg-pages-compat") {
379 auth, err = authorizeCodebergPagesV2(r)
380 if err != nil && IsUnauthorized(err) {
381 causes = append(causes, err)
382 } else if err != nil { // bad request
383 return nil, err
384 } else {
385 logc.Printf(r.Context(), "auth: codeberg %s\n", r.Host)
386 return auth, nil
387 }
388 }
389
390 return nil, joinErrors(causes...)
391}
392
393// Returns `repoURLs, err` where if `err == nil` then the request is authorized to clone from
394// any repository URL included in `repoURLs` (by case-insensitive comparison), or any URL at all
395// if `repoURLs == nil`.
396func AuthorizeUpdateFromRepository(r *http.Request) (*Authorization, error) {
397 causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}}
398
399 if err := CheckForbiddenDomain(r); err != nil {
400 return nil, err
401 }
402
403 auth := authorizeInsecure(r)
404 if auth != nil {
405 return auth, nil
406 }
407
408 // DNS challenge gives absolute authority.
409 auth, err := authorizeDNSChallenge(r)
410 if err != nil && IsUnauthorized(err) {
411 causes = append(causes, err)
412 } else if err != nil { // bad request
413 return nil, err
414 } else {
415 logc.Println(r.Context(), "auth: DNS challenge: allow *")
416 return auth, nil
417 }
418
419 // DNS allowlist gives authority to update but not delete.
420 if r.Method == http.MethodPut || r.Method == http.MethodPost {
421 auth, err = authorizeDNSAllowlist(r)
422 if err != nil && IsUnauthorized(err) {
423 causes = append(causes, err)
424 } else if err != nil { // bad request
425 return nil, err
426 } else {
427 logc.Printf(r.Context(), "auth: DNS allowlist: allow %v\n", auth.repoURLs)
428 return auth, nil
429 }
430 }
431
432 // Wildcard match is only available for webhooks, not the REST API.
433 if r.Method == http.MethodPost {
434 for _, pattern := range wildcards {
435 auth, err = authorizeWildcardMatchSite(r, pattern)
436 if err != nil && IsUnauthorized(err) {
437 causes = append(causes, err)
438 } else if err != nil { // bad request
439 return nil, err
440 } else {
441 logc.Printf(r.Context(), "auth: wildcard %s: allow %v\n", pattern.GetHost(), auth.repoURLs)
442 return auth, nil
443 }
444 }
445
446 if config.Feature("codeberg-pages-compat") {
447 auth, err = authorizeCodebergPagesV2(r)
448 if err != nil && IsUnauthorized(err) {
449 causes = append(causes, err)
450 } else if err != nil { // bad request
451 return nil, err
452 } else {
453 logc.Printf(r.Context(), "auth: codeberg %s: allow %v branch %s\n",
454 r.Host, auth.repoURLs, auth.branch)
455 return auth, nil
456 }
457 }
458 }
459
460 return nil, joinErrors(causes...)
461}
462
463func checkAllowedURLPrefix(repoURL string) error {
464 if len(config.Limits.AllowedRepositoryURLPrefixes) > 0 {
465 allowedPrefix := false
466 repoURL = strings.ToLower(repoURL)
467 for _, allowedRepoURLPrefix := range config.Limits.AllowedRepositoryURLPrefixes {
468 if strings.HasPrefix(repoURL, strings.ToLower(allowedRepoURLPrefix)) {
469 allowedPrefix = true
470 break
471 }
472 }
473 if !allowedPrefix {
474 return AuthError{
475 http.StatusUnauthorized,
476 fmt.Sprintf("clone URL not in prefix allowlist %v",
477 config.Limits.AllowedRepositoryURLPrefixes),
478 }
479 }
480 }
481
482 return nil
483}
484
485var repoURLSchemeAllowlist []string = []string{"ssh", "http", "https"}
486
487func AuthorizeRepository(repoURL string, auth *Authorization) error {
488 // Regardless of any other authorization, only the allowlisted URL schemes
489 // may ever be cloned from, so this check has to come first.
490 parsedRepoURL, err := url.Parse(repoURL)
491 if err != nil {
492 if strings.HasPrefix(repoURL, "git@") {
493 return AuthError{http.StatusBadRequest, "malformed clone URL; use ssh:// scheme"}
494 } else {
495 return AuthError{http.StatusBadRequest, "malformed clone URL"}
496 }
497 }
498 if !slices.Contains(repoURLSchemeAllowlist, parsedRepoURL.Scheme) {
499 return AuthError{
500 http.StatusUnauthorized,
501 fmt.Sprintf("clone URL scheme not in allowlist %v",
502 repoURLSchemeAllowlist),
503 }
504 }
505
506 if auth.repoURLs == nil {
507 return nil // any
508 }
509
510 if err = checkAllowedURLPrefix(repoURL); err != nil {
511 return err
512 }
513
514 allowed := false
515 repoURL = strings.ToLower(repoURL)
516 for _, allowedRepoURL := range auth.repoURLs {
517 if repoURL == strings.ToLower(allowedRepoURL) {
518 allowed = true
519 break
520 }
521 }
522 if !allowed {
523 return AuthError{
524 http.StatusUnauthorized,
525 fmt.Sprintf("clone URL not in allowlist %v", auth.repoURLs),
526 }
527 }
528
529 return nil
530}
531
532// The purpose of `allowRepoURLs` is to make sure that only authorized content is deployed
533// to the site despite the fact that the non-shared-secret authorization methods allow anyone
534// to impersonate the legitimate webhook sender. (If switching to another repository URL would
535// be catastrophic, then so would be switching to a different branch.)
536func AuthorizeBranch(branch string, auth *Authorization) error {
537 if auth.repoURLs == nil {
538 return nil // any
539 }
540
541 if branch == auth.branch {
542 return nil
543 } else {
544 return AuthError{
545 http.StatusUnauthorized,
546 fmt.Sprintf("branch %s not in allowlist %v", branch, []string{auth.branch}),
547 }
548 }
549}
550
551// Gogs, Gitea, and Forgejo all support the same API here.
552func checkGogsRepositoryPushPermission(baseURL *url.URL, authorization string) error {
553 ownerAndRepo := strings.TrimSuffix(strings.TrimPrefix(baseURL.Path, "/"), ".git")
554 request, err := http.NewRequest("GET", baseURL.ResolveReference(&url.URL{
555 Path: fmt.Sprintf("/api/v1/repos/%s", ownerAndRepo),
556 }).String(), nil)
557 if err != nil {
558 panic(err) // misconfiguration
559 }
560 request.Header.Set("Accept", "application/json")
561 request.Header.Set("Authorization", authorization)
562
563 httpClient := http.Client{Timeout: 5 * time.Second}
564 response, err := httpClient.Do(request)
565 if err != nil {
566 return AuthError{
567 http.StatusServiceUnavailable,
568 fmt.Sprintf("cannot check repository permissions: %s", err),
569 }
570 }
571 defer response.Body.Close()
572
573 if response.StatusCode == http.StatusNotFound {
574 return AuthError{
575 http.StatusNotFound,
576 fmt.Sprintf("no repository %s", ownerAndRepo),
577 }
578 } else if response.StatusCode != http.StatusOK {
579 return AuthError{
580 http.StatusServiceUnavailable,
581 fmt.Sprintf(
582 "cannot check repository permissions: GET %s returned %s",
583 request.URL,
584 response.Status,
585 ),
586 }
587 }
588 decoder := json.NewDecoder(response.Body)
589
590 var repositoryInfo struct{ Permissions struct{ Push bool } }
591 if err = decoder.Decode(&repositoryInfo); err != nil {
592 return errors.Join(AuthError{
593 http.StatusServiceUnavailable,
594 fmt.Sprintf(
595 "cannot check repository permissions: GET %s returned malformed JSON",
596 request.URL,
597 ),
598 }, err)
599 }
600
601 if !repositoryInfo.Permissions.Push {
602 return AuthError{
603 http.StatusUnauthorized,
604 fmt.Sprintf("no push permission for %s", ownerAndRepo),
605 }
606 }
607
608 // this token authorizes pushing to the repo, yay!
609 return nil
610}
611
612func authorizeForgeWithToken(r *http.Request) (*Authorization, error) {
613 authorization := r.Header.Get("Forge-Authorization")
614 if authorization == "" {
615 return nil, AuthError{http.StatusUnauthorized, "missing Forge-Authorization header"}
616 }
617
618 host, err := GetHost(r)
619 if err != nil {
620 return nil, err
621 }
622
623 projectName, err := GetProjectName(r)
624 if err != nil {
625 return nil, err
626 }
627
628 var errs []error
629 for _, pattern := range wildcards {
630 if !pattern.Authorization {
631 continue
632 }
633
634 if userName, found := pattern.Matches(host); found {
635 repoURLs, branch := pattern.ApplyTemplate(userName, projectName)
636 for _, repoURL := range repoURLs {
637 parsedRepoURL, err := url.Parse(repoURL)
638 if err != nil {
639 panic(err) // misconfiguration
640 }
641
642 if err = checkGogsRepositoryPushPermission(parsedRepoURL, authorization); err != nil {
643 errs = append(errs, err)
644 continue
645 }
646
647 // This will actually be ignored by the caller of AuthorizeUpdateFromArchive,
648 // but we return this information as it makes sense to do contextually here.
649 return &Authorization{
650 []string{repoURL},
651 branch,
652 }, nil
653 }
654 }
655 }
656
657 errs = append([]error{
658 AuthError{http.StatusUnauthorized, "not authorized by forge"},
659 }, errs...)
660 return nil, joinErrors(errs...)
661}
662
663func AuthorizeUpdateFromArchive(r *http.Request) (*Authorization, error) {
664 causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}}
665
666 if err := CheckForbiddenDomain(r); err != nil {
667 return nil, err
668 }
669
670 auth := authorizeInsecure(r)
671 if auth != nil {
672 return auth, nil
673 }
674
675 // Token authorization allows updating a site on a wildcard domain from an archive.
676 auth, err := authorizeForgeWithToken(r)
677 if err != nil && IsUnauthorized(err) {
678 causes = append(causes, err)
679 } else if err != nil { // bad request
680 return nil, err
681 } else {
682 logc.Printf(r.Context(), "auth: forge token: allow\n")
683 return auth, nil
684 }
685
686 if len(config.Limits.AllowedRepositoryURLPrefixes) > 0 {
687 causes = append(causes, AuthError{http.StatusUnauthorized, "DNS challenge not allowed"})
688 } else {
689 // DNS challenge gives absolute authority.
690 auth, err = authorizeDNSChallenge(r)
691 if err != nil && IsUnauthorized(err) {
692 causes = append(causes, err)
693 } else if err != nil { // bad request
694 return nil, err
695 } else {
696 logc.Println(r.Context(), "auth: DNS challenge")
697 return auth, nil
698 }
699 }
700
701 return nil, joinErrors(causes...)
702}
703
704func CheckForbiddenDomain(r *http.Request) error {
705 host, err := GetHost(r)
706 if err != nil {
707 return err
708 }
709
710 host = strings.ToLower(host)
711 for _, reservedDomain := range config.Limits.ForbiddenDomains {
712 reservedDomain = strings.ToLower(reservedDomain)
713 if host == reservedDomain || strings.HasSuffix(host, fmt.Sprintf(".%s", reservedDomain)) {
714 return AuthError{http.StatusForbidden, "forbidden domain"}
715 }
716 }
717
718 return nil
719}