[mirror] Scalable static site server for Git forges (like GitHub Pages)
at main 20 kB view raw
1package git_pages 2 3import ( 4 "crypto/sha256" 5 "encoding/base64" 6 "encoding/json" 7 "errors" 8 "fmt" 9 "net" 10 "net/http" 11 "net/url" 12 "slices" 13 "strings" 14 "time" 15 16 "golang.org/x/net/idna" 17) 18 19type AuthError struct { 20 code int 21 error string 22} 23 24func (e AuthError) Error() string { 25 return e.error 26} 27 28func IsUnauthorized(err error) bool { 29 var authErr AuthError 30 if errors.As(err, &authErr) { 31 return authErr.code == http.StatusUnauthorized 32 } 33 return false 34} 35 36func authorizeInsecure(r *http.Request) *Authorization { 37 if config.Insecure { // for testing only 38 logc.Println(r.Context(), "auth: INSECURE mode") 39 return &Authorization{ 40 repoURLs: nil, 41 branch: "pages", 42 } 43 } 44 return nil 45} 46 47var idnaProfile = idna.New(idna.MapForLookup(), idna.BidiRule()) 48 49func GetHost(r *http.Request) (string, error) { 50 host, _, err := net.SplitHostPort(r.Host) 51 if err != nil { 52 host = r.Host 53 } 54 // this also rejects invalid characters and labels 55 host, err = idnaProfile.ToASCII(host) 56 if err != nil { 57 if config.Feature("relaxed-idna") { 58 // unfortunately, the go IDNA library has some significant issues around its 59 // Unicode TR46 implementation: https://github.com/golang/go/issues/76804 60 // we would like to allow *just* the _ here, but adding `idna.StrictDomainName(false)` 61 // would also accept domains like `*.foo.bar` which should clearly be disallowed. 62 // as a workaround, accept a domain name if it is valid with all `_` characters 63 // replaced with an alphanumeric character (we use `a`); this allows e.g. `foo_bar.xxx` 64 // and `foo__bar.xxx`, as well as `_foo.xxx` and `foo_.xxx`. labels starting with 65 // an underscore are explicitly rejected below. 66 _, err = idnaProfile.ToASCII(strings.ReplaceAll(host, "_", "a")) 67 } 68 if err != nil { 69 return "", AuthError{http.StatusBadRequest, 70 fmt.Sprintf("malformed host name %q", host)} 71 } 72 } 73 if strings.HasPrefix(host, ".") || strings.HasPrefix(host, "_") { 74 return "", AuthError{http.StatusBadRequest, 75 fmt.Sprintf("reserved host name %q", host)} 76 } 77 host = strings.TrimSuffix(host, ".") 78 return host, nil 79} 80 81func IsValidProjectName(name string) bool { 82 return !strings.HasPrefix(name, ".") && !strings.Contains(name, "%") 83} 84 85func GetProjectName(r *http.Request) (string, error) { 86 // path must be either `/` or `/foo/` (`/foo` is accepted as an alias) 87 path := strings.TrimPrefix(strings.TrimSuffix(r.URL.Path, "/"), "/") 88 if !IsValidProjectName(path) { 89 return "", AuthError{http.StatusBadRequest, 90 fmt.Sprintf("directory name %q is reserved", ".index")} 91 } else if strings.Contains(path, "/") { 92 return "", AuthError{http.StatusBadRequest, 93 "directories nested too deep"} 94 } 95 96 if path == "" { 97 // path `/` corresponds to pseudo-project `.index` 98 return ".index", nil 99 } else { 100 return path, nil 101 } 102} 103 104type Authorization struct { 105 // If `nil`, any URL is allowed. If not, only those in the set are allowed. 106 repoURLs []string 107 // Only the exact branch is allowed. 108 branch string 109} 110 111func authorizeDNSChallenge(r *http.Request) (*Authorization, error) { 112 host, err := GetHost(r) 113 if err != nil { 114 return nil, err 115 } 116 117 authorization := r.Header.Get("Authorization") 118 if authorization == "" { 119 return nil, AuthError{http.StatusUnauthorized, 120 "missing Authorization header"} 121 } 122 123 scheme, param, success := strings.Cut(authorization, " ") 124 if !success { 125 return nil, AuthError{http.StatusBadRequest, 126 "malformed Authorization header"} 127 } 128 129 if scheme != "Pages" && scheme != "Basic" { 130 return nil, AuthError{http.StatusBadRequest, 131 "unknown Authorization scheme"} 132 } 133 134 // services like GitHub and Gogs cannot send a custom Authorization: header, but supplying 135 // username and password in the URL is basically just as good 136 if scheme == "Basic" { 137 basicParam, err := base64.StdEncoding.DecodeString(param) 138 if err != nil { 139 return nil, AuthError{http.StatusBadRequest, 140 "malformed Authorization: Basic header"} 141 } 142 143 username, password, found := strings.Cut(string(basicParam), ":") 144 if !found { 145 return nil, AuthError{http.StatusBadRequest, 146 "malformed Authorization: Basic parameter"} 147 } 148 149 if username != "Pages" { 150 return nil, AuthError{http.StatusUnauthorized, 151 "unexpected Authorization: Basic username"} 152 } 153 154 param = password 155 } 156 157 challengeHostname := fmt.Sprintf("_git-pages-challenge.%s", host) 158 actualChallenges, err := net.LookupTXT(challengeHostname) 159 if err != nil { 160 return nil, AuthError{http.StatusUnauthorized, 161 fmt.Sprintf("failed to look up DNS challenge: %s TXT", challengeHostname)} 162 } 163 164 expectedChallenge := fmt.Sprintf("%x", sha256.Sum256(fmt.Appendf(nil, "%s %s", host, param))) 165 if !slices.Contains(actualChallenges, expectedChallenge) { 166 return nil, AuthError{http.StatusUnauthorized, fmt.Sprintf( 167 "defeated by DNS challenge: %s TXT %v does not include %s", 168 challengeHostname, 169 actualChallenges, 170 expectedChallenge, 171 )} 172 } 173 174 return &Authorization{ 175 repoURLs: nil, // any 176 branch: "pages", 177 }, nil 178} 179 180func authorizeDNSAllowlist(r *http.Request) (*Authorization, error) { 181 host, err := GetHost(r) 182 if err != nil { 183 return nil, err 184 } 185 186 projectName, err := GetProjectName(r) 187 if err != nil { 188 return nil, err 189 } 190 191 allowlistHostname := fmt.Sprintf("_git-pages-repository.%s", host) 192 records, err := net.LookupTXT(allowlistHostname) 193 if err != nil { 194 return nil, AuthError{http.StatusUnauthorized, 195 fmt.Sprintf("failed to look up DNS repository allowlist: %s TXT", allowlistHostname)} 196 } 197 198 if projectName != ".index" { 199 return nil, AuthError{http.StatusUnauthorized, 200 "DNS repository allowlist only authorizes index site"} 201 } 202 203 var ( 204 repoURLs []string 205 errs []error 206 ) 207 for _, record := range records { 208 if parsedURL, err := url.Parse(record); err != nil { 209 errs = append(errs, fmt.Errorf("failed to parse URL: %s TXT %q", allowlistHostname, record)) 210 } else if !parsedURL.IsAbs() { 211 errs = append(errs, fmt.Errorf("repository URL is not absolute: %s TXT %q", allowlistHostname, record)) 212 } else { 213 repoURLs = append(repoURLs, record) 214 } 215 } 216 217 if len(repoURLs) == 0 { 218 if len(records) > 0 { 219 errs = append([]error{AuthError{http.StatusUnauthorized, 220 fmt.Sprintf("no valid DNS TXT records for %s", allowlistHostname)}}, 221 errs...) 222 return nil, joinErrors(errs...) 223 } else { 224 return nil, AuthError{http.StatusUnauthorized, 225 fmt.Sprintf("no DNS TXT records found for %s", allowlistHostname)} 226 } 227 } 228 229 return &Authorization{ 230 repoURLs: repoURLs, 231 branch: "pages", 232 }, err 233} 234 235// used for `/.git-pages/...` metadata 236func authorizeWildcardMatchHost(r *http.Request, pattern *WildcardPattern) (*Authorization, error) { 237 host, err := GetHost(r) 238 if err != nil { 239 return nil, err 240 } 241 242 if _, found := pattern.Matches(host); found { 243 return &Authorization{ 244 repoURLs: []string{}, 245 branch: "", 246 }, nil 247 } else { 248 return nil, AuthError{ 249 http.StatusUnauthorized, 250 fmt.Sprintf("domain %s does not match wildcard %s", host, pattern.GetHost()), 251 } 252 } 253} 254 255// used for updates to site content 256func authorizeWildcardMatchSite(r *http.Request, pattern *WildcardPattern) (*Authorization, error) { 257 host, err := GetHost(r) 258 if err != nil { 259 return nil, err 260 } 261 262 projectName, err := GetProjectName(r) 263 if err != nil { 264 return nil, err 265 } 266 267 if userName, found := pattern.Matches(host); found { 268 repoURLs, branch := pattern.ApplyTemplate(userName, projectName) 269 return &Authorization{repoURLs, branch}, nil 270 } else { 271 return nil, AuthError{ 272 http.StatusUnauthorized, 273 fmt.Sprintf("domain %s does not match wildcard %s", host, pattern.GetHost()), 274 } 275 } 276} 277 278// used for compatibility with Codeberg Pages v2 279// see https://docs.codeberg.org/codeberg-pages/using-custom-domain/ 280func authorizeCodebergPagesV2(r *http.Request) (*Authorization, error) { 281 host, err := GetHost(r) 282 if err != nil { 283 return nil, err 284 } 285 286 dnsRecords := []string{} 287 288 cnameRecord, err := net.LookupCNAME(host) 289 // "LookupCNAME does not return an error if host does not contain DNS "CNAME" records, 290 // as long as host resolves to address records. 291 if err == nil && cnameRecord != host { 292 // LookupCNAME() returns a domain with the root label, i.e. `username.codeberg.page.`, 293 // with the trailing dot 294 dnsRecords = append(dnsRecords, strings.TrimSuffix(cnameRecord, ".")) 295 } 296 297 txtRecords, err := net.LookupTXT(host) 298 if err == nil { 299 dnsRecords = append(dnsRecords, txtRecords...) 300 } 301 302 if len(dnsRecords) > 0 { 303 logc.Printf(r.Context(), "auth: %s TXT/CNAME: %q\n", host, dnsRecords) 304 } 305 306 for _, dnsRecord := range dnsRecords { 307 domainParts := strings.Split(dnsRecord, ".") 308 slices.Reverse(domainParts) 309 if domainParts[0] == "" { 310 domainParts = domainParts[1:] 311 } 312 if len(domainParts) >= 3 && len(domainParts) <= 5 { 313 if domainParts[0] == "page" && domainParts[1] == "codeberg" { 314 // map of domain names to allowed repository and branch: 315 // * {username}.codeberg.page => 316 // https://codeberg.org/{username}/pages.git#main 317 // * {reponame}.{username}.codeberg.page => 318 // https://codeberg.org/{username}/{reponame}.git#pages 319 // * {branch}.{reponame}.{username}.codeberg.page => 320 // https://codeberg.org/{username}/{reponame}.git#{branch} 321 username := domainParts[2] 322 reponame := "pages" 323 branch := "main" 324 if len(domainParts) >= 4 { 325 reponame = domainParts[3] 326 branch = "pages" 327 } 328 if len(domainParts) == 5 { 329 branch = domainParts[4] 330 } 331 return &Authorization{ 332 repoURLs: []string{ 333 fmt.Sprintf("https://codeberg.org/%s/%s.git", username, reponame), 334 }, 335 branch: branch, 336 }, nil 337 } 338 } 339 } 340 341 return nil, AuthError{ 342 http.StatusUnauthorized, 343 fmt.Sprintf("domain %s does not have Codeberg Pages TXT or CNAME records", host), 344 } 345} 346 347// Checks whether an operation that enables enumerating site contents is allowed. 348func AuthorizeMetadataRetrieval(r *http.Request) (*Authorization, error) { 349 causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}} 350 351 auth := authorizeInsecure(r) 352 if auth != nil { 353 return auth, nil 354 } 355 356 auth, err := authorizeDNSChallenge(r) 357 if err != nil && IsUnauthorized(err) { 358 causes = append(causes, err) 359 } else if err != nil { // bad request 360 return nil, err 361 } else { 362 logc.Println(r.Context(), "auth: DNS challenge") 363 return auth, nil 364 } 365 366 for _, pattern := range wildcards { 367 auth, err = authorizeWildcardMatchHost(r, pattern) 368 if err != nil && IsUnauthorized(err) { 369 causes = append(causes, err) 370 } else if err != nil { // bad request 371 return nil, err 372 } else { 373 logc.Printf(r.Context(), "auth: wildcard %s\n", pattern.GetHost()) 374 return auth, nil 375 } 376 } 377 378 if config.Feature("codeberg-pages-compat") { 379 auth, err = authorizeCodebergPagesV2(r) 380 if err != nil && IsUnauthorized(err) { 381 causes = append(causes, err) 382 } else if err != nil { // bad request 383 return nil, err 384 } else { 385 logc.Printf(r.Context(), "auth: codeberg %s\n", r.Host) 386 return auth, nil 387 } 388 } 389 390 return nil, joinErrors(causes...) 391} 392 393// Returns `repoURLs, err` where if `err == nil` then the request is authorized to clone from 394// any repository URL included in `repoURLs` (by case-insensitive comparison), or any URL at all 395// if `repoURLs == nil`. 396func AuthorizeUpdateFromRepository(r *http.Request) (*Authorization, error) { 397 causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}} 398 399 if err := CheckForbiddenDomain(r); err != nil { 400 return nil, err 401 } 402 403 auth := authorizeInsecure(r) 404 if auth != nil { 405 return auth, nil 406 } 407 408 // DNS challenge gives absolute authority. 409 auth, err := authorizeDNSChallenge(r) 410 if err != nil && IsUnauthorized(err) { 411 causes = append(causes, err) 412 } else if err != nil { // bad request 413 return nil, err 414 } else { 415 logc.Println(r.Context(), "auth: DNS challenge: allow *") 416 return auth, nil 417 } 418 419 // DNS allowlist gives authority to update but not delete. 420 if r.Method == http.MethodPut || r.Method == http.MethodPost { 421 auth, err = authorizeDNSAllowlist(r) 422 if err != nil && IsUnauthorized(err) { 423 causes = append(causes, err) 424 } else if err != nil { // bad request 425 return nil, err 426 } else { 427 logc.Printf(r.Context(), "auth: DNS allowlist: allow %v\n", auth.repoURLs) 428 return auth, nil 429 } 430 } 431 432 // Wildcard match is only available for webhooks, not the REST API. 433 if r.Method == http.MethodPost { 434 for _, pattern := range wildcards { 435 auth, err = authorizeWildcardMatchSite(r, pattern) 436 if err != nil && IsUnauthorized(err) { 437 causes = append(causes, err) 438 } else if err != nil { // bad request 439 return nil, err 440 } else { 441 logc.Printf(r.Context(), "auth: wildcard %s: allow %v\n", pattern.GetHost(), auth.repoURLs) 442 return auth, nil 443 } 444 } 445 446 if config.Feature("codeberg-pages-compat") { 447 auth, err = authorizeCodebergPagesV2(r) 448 if err != nil && IsUnauthorized(err) { 449 causes = append(causes, err) 450 } else if err != nil { // bad request 451 return nil, err 452 } else { 453 logc.Printf(r.Context(), "auth: codeberg %s: allow %v branch %s\n", 454 r.Host, auth.repoURLs, auth.branch) 455 return auth, nil 456 } 457 } 458 } 459 460 return nil, joinErrors(causes...) 461} 462 463func checkAllowedURLPrefix(repoURL string) error { 464 if len(config.Limits.AllowedRepositoryURLPrefixes) > 0 { 465 allowedPrefix := false 466 repoURL = strings.ToLower(repoURL) 467 for _, allowedRepoURLPrefix := range config.Limits.AllowedRepositoryURLPrefixes { 468 if strings.HasPrefix(repoURL, strings.ToLower(allowedRepoURLPrefix)) { 469 allowedPrefix = true 470 break 471 } 472 } 473 if !allowedPrefix { 474 return AuthError{ 475 http.StatusUnauthorized, 476 fmt.Sprintf("clone URL not in prefix allowlist %v", 477 config.Limits.AllowedRepositoryURLPrefixes), 478 } 479 } 480 } 481 482 return nil 483} 484 485var repoURLSchemeAllowlist []string = []string{"ssh", "http", "https"} 486 487func AuthorizeRepository(repoURL string, auth *Authorization) error { 488 // Regardless of any other authorization, only the allowlisted URL schemes 489 // may ever be cloned from, so this check has to come first. 490 parsedRepoURL, err := url.Parse(repoURL) 491 if err != nil { 492 if strings.HasPrefix(repoURL, "git@") { 493 return AuthError{http.StatusBadRequest, "malformed clone URL; use ssh:// scheme"} 494 } else { 495 return AuthError{http.StatusBadRequest, "malformed clone URL"} 496 } 497 } 498 if !slices.Contains(repoURLSchemeAllowlist, parsedRepoURL.Scheme) { 499 return AuthError{ 500 http.StatusUnauthorized, 501 fmt.Sprintf("clone URL scheme not in allowlist %v", 502 repoURLSchemeAllowlist), 503 } 504 } 505 506 if auth.repoURLs == nil { 507 return nil // any 508 } 509 510 if err = checkAllowedURLPrefix(repoURL); err != nil { 511 return err 512 } 513 514 allowed := false 515 repoURL = strings.ToLower(repoURL) 516 for _, allowedRepoURL := range auth.repoURLs { 517 if repoURL == strings.ToLower(allowedRepoURL) { 518 allowed = true 519 break 520 } 521 } 522 if !allowed { 523 return AuthError{ 524 http.StatusUnauthorized, 525 fmt.Sprintf("clone URL not in allowlist %v", auth.repoURLs), 526 } 527 } 528 529 return nil 530} 531 532// The purpose of `allowRepoURLs` is to make sure that only authorized content is deployed 533// to the site despite the fact that the non-shared-secret authorization methods allow anyone 534// to impersonate the legitimate webhook sender. (If switching to another repository URL would 535// be catastrophic, then so would be switching to a different branch.) 536func AuthorizeBranch(branch string, auth *Authorization) error { 537 if auth.repoURLs == nil { 538 return nil // any 539 } 540 541 if branch == auth.branch { 542 return nil 543 } else { 544 return AuthError{ 545 http.StatusUnauthorized, 546 fmt.Sprintf("branch %s not in allowlist %v", branch, []string{auth.branch}), 547 } 548 } 549} 550 551// Gogs, Gitea, and Forgejo all support the same API here. 552func checkGogsRepositoryPushPermission(baseURL *url.URL, authorization string) error { 553 ownerAndRepo := strings.TrimSuffix(strings.TrimPrefix(baseURL.Path, "/"), ".git") 554 request, err := http.NewRequest("GET", baseURL.ResolveReference(&url.URL{ 555 Path: fmt.Sprintf("/api/v1/repos/%s", ownerAndRepo), 556 }).String(), nil) 557 if err != nil { 558 panic(err) // misconfiguration 559 } 560 request.Header.Set("Accept", "application/json") 561 request.Header.Set("Authorization", authorization) 562 563 httpClient := http.Client{Timeout: 5 * time.Second} 564 response, err := httpClient.Do(request) 565 if err != nil { 566 return AuthError{ 567 http.StatusServiceUnavailable, 568 fmt.Sprintf("cannot check repository permissions: %s", err), 569 } 570 } 571 defer response.Body.Close() 572 573 if response.StatusCode == http.StatusNotFound { 574 return AuthError{ 575 http.StatusNotFound, 576 fmt.Sprintf("no repository %s", ownerAndRepo), 577 } 578 } else if response.StatusCode != http.StatusOK { 579 return AuthError{ 580 http.StatusServiceUnavailable, 581 fmt.Sprintf( 582 "cannot check repository permissions: GET %s returned %s", 583 request.URL, 584 response.Status, 585 ), 586 } 587 } 588 decoder := json.NewDecoder(response.Body) 589 590 var repositoryInfo struct{ Permissions struct{ Push bool } } 591 if err = decoder.Decode(&repositoryInfo); err != nil { 592 return errors.Join(AuthError{ 593 http.StatusServiceUnavailable, 594 fmt.Sprintf( 595 "cannot check repository permissions: GET %s returned malformed JSON", 596 request.URL, 597 ), 598 }, err) 599 } 600 601 if !repositoryInfo.Permissions.Push { 602 return AuthError{ 603 http.StatusUnauthorized, 604 fmt.Sprintf("no push permission for %s", ownerAndRepo), 605 } 606 } 607 608 // this token authorizes pushing to the repo, yay! 609 return nil 610} 611 612func authorizeForgeWithToken(r *http.Request) (*Authorization, error) { 613 authorization := r.Header.Get("Forge-Authorization") 614 if authorization == "" { 615 return nil, AuthError{http.StatusUnauthorized, "missing Forge-Authorization header"} 616 } 617 618 host, err := GetHost(r) 619 if err != nil { 620 return nil, err 621 } 622 623 projectName, err := GetProjectName(r) 624 if err != nil { 625 return nil, err 626 } 627 628 var errs []error 629 for _, pattern := range wildcards { 630 if !pattern.Authorization { 631 continue 632 } 633 634 if userName, found := pattern.Matches(host); found { 635 repoURLs, branch := pattern.ApplyTemplate(userName, projectName) 636 for _, repoURL := range repoURLs { 637 parsedRepoURL, err := url.Parse(repoURL) 638 if err != nil { 639 panic(err) // misconfiguration 640 } 641 642 if err = checkGogsRepositoryPushPermission(parsedRepoURL, authorization); err != nil { 643 errs = append(errs, err) 644 continue 645 } 646 647 // This will actually be ignored by the caller of AuthorizeUpdateFromArchive, 648 // but we return this information as it makes sense to do contextually here. 649 return &Authorization{ 650 []string{repoURL}, 651 branch, 652 }, nil 653 } 654 } 655 } 656 657 errs = append([]error{ 658 AuthError{http.StatusUnauthorized, "not authorized by forge"}, 659 }, errs...) 660 return nil, joinErrors(errs...) 661} 662 663func AuthorizeUpdateFromArchive(r *http.Request) (*Authorization, error) { 664 causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}} 665 666 if err := CheckForbiddenDomain(r); err != nil { 667 return nil, err 668 } 669 670 auth := authorizeInsecure(r) 671 if auth != nil { 672 return auth, nil 673 } 674 675 // Token authorization allows updating a site on a wildcard domain from an archive. 676 auth, err := authorizeForgeWithToken(r) 677 if err != nil && IsUnauthorized(err) { 678 causes = append(causes, err) 679 } else if err != nil { // bad request 680 return nil, err 681 } else { 682 logc.Printf(r.Context(), "auth: forge token: allow\n") 683 return auth, nil 684 } 685 686 if len(config.Limits.AllowedRepositoryURLPrefixes) > 0 { 687 causes = append(causes, AuthError{http.StatusUnauthorized, "DNS challenge not allowed"}) 688 } else { 689 // DNS challenge gives absolute authority. 690 auth, err = authorizeDNSChallenge(r) 691 if err != nil && IsUnauthorized(err) { 692 causes = append(causes, err) 693 } else if err != nil { // bad request 694 return nil, err 695 } else { 696 logc.Println(r.Context(), "auth: DNS challenge") 697 return auth, nil 698 } 699 } 700 701 return nil, joinErrors(causes...) 702} 703 704func CheckForbiddenDomain(r *http.Request) error { 705 host, err := GetHost(r) 706 if err != nil { 707 return err 708 } 709 710 host = strings.ToLower(host) 711 for _, reservedDomain := range config.Limits.ForbiddenDomains { 712 reservedDomain = strings.ToLower(reservedDomain) 713 if host == reservedDomain || strings.HasSuffix(host, fmt.Sprintf(".%s", reservedDomain)) { 714 return AuthError{http.StatusForbidden, "forbidden domain"} 715 } 716 } 717 718 return nil 719}