[mirror] Scalable static site server for Git forges (like GitHub Pages)
at main 11 kB view raw
1package git_pages 2 3import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "net/url" 8 "os" 9 "reflect" 10 "slices" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/c2h5oh/datasize" 16 "github.com/creasty/defaults" 17 "github.com/pelletier/go-toml/v2" 18) 19 20// For an unknown reason, the standard `time.Duration` type doesn't implement the standard 21// `encoding.{TextMarshaler,TextUnmarshaler}` interfaces. 22type Duration time.Duration 23 24func (t Duration) String() string { 25 return fmt.Sprint(time.Duration(t)) 26} 27 28func (t *Duration) UnmarshalText(data []byte) (err error) { 29 u, err := time.ParseDuration(string(data)) 30 if err == nil { 31 *t = Duration(u) 32 } 33 return 34} 35 36func (t *Duration) MarshalText() ([]byte, error) { 37 return []byte(t.String()), nil 38} 39 40// For a known but upsetting reason, the standard `url.URL` type doesn't implement the standard 41// `encoding.{TextMarshaler,TextUnmarshaler}` interfaces. 42type URL struct { 43 url.URL 44} 45 46func (t *URL) String() string { 47 return fmt.Sprint(&t.URL) 48} 49 50func (t *URL) UnmarshalText(data []byte) (err error) { 51 u, err := url.Parse(string(data)) 52 if err == nil { 53 *t = URL{*u} 54 } 55 return 56} 57 58func (t *URL) MarshalText() ([]byte, error) { 59 return []byte(t.String()), nil 60} 61 62type Config struct { 63 Insecure bool `toml:"-" env:"insecure"` 64 Features []string `toml:"features"` 65 LogFormat string `toml:"log-format" default:"text"` 66 Server ServerConfig `toml:"server"` 67 Wildcard []WildcardConfig `toml:"wildcard"` 68 Fallback FallbackConfig `toml:"fallback"` 69 Storage StorageConfig `toml:"storage"` 70 Limits LimitsConfig `toml:"limits"` 71 Audit AuditConfig `toml:"audit"` 72 Observability ObservabilityConfig `toml:"observability"` 73} 74 75type ServerConfig struct { 76 Pages string `toml:"pages" default:"tcp/localhost:3000"` 77 Caddy string `toml:"caddy" default:"tcp/localhost:3001"` 78 Metrics string `toml:"metrics" default:"tcp/localhost:3002"` 79} 80 81type WildcardConfig struct { 82 Domain string `toml:"domain"` 83 CloneURL string `toml:"clone-url"` // URL template, not an exact URL 84 IndexRepos []string `toml:"index-repos" default:"[]"` 85 IndexRepoBranch string `toml:"index-repo-branch" default:"pages"` 86 Authorization string `toml:"authorization"` 87} 88 89type FallbackConfig struct { 90 ProxyTo *URL `toml:"proxy-to"` 91 Insecure bool `toml:"insecure"` 92} 93 94type CacheConfig struct { 95 MaxSize datasize.ByteSize `toml:"max-size"` 96 MaxAge Duration `toml:"max-age"` 97 MaxStale Duration `toml:"max-stale"` 98} 99 100type StorageConfig struct { 101 Type string `toml:"type" default:"fs"` 102 FS FSConfig `toml:"fs" default:"{\"Root\":\"./data\"}"` 103 S3 S3Config `toml:"s3"` 104} 105 106type FSConfig struct { 107 Root string `toml:"root"` 108} 109 110type S3Config struct { 111 Endpoint string `toml:"endpoint"` 112 Insecure bool `toml:"insecure"` 113 AccessKeyID string `toml:"access-key-id"` 114 SecretAccessKey string `toml:"secret-access-key"` 115 Region string `toml:"region"` 116 Bucket string `toml:"bucket"` 117 BlobCache CacheConfig `toml:"blob-cache" default:"{\"MaxSize\":\"256MB\"}"` 118 SiteCache CacheConfig `toml:"site-cache" default:"{\"MaxAge\":\"60s\",\"MaxStale\":\"1h\",\"MaxSize\":\"16MB\"}"` 119} 120 121type LimitsConfig struct { 122 // Maximum size of a single published site. Also used to limit the size of archive 123 // uploads and other similar overconsumption conditions. 124 MaxSiteSize datasize.ByteSize `toml:"max-site-size" default:"128M"` 125 // Maximum size of a single site manifest, computed over its binary Protobuf 126 // serialization. 127 MaxManifestSize datasize.ByteSize `toml:"max-manifest-size" default:"1M"` 128 // Maximum size of a file that will still be inlined into the site manifest. 129 MaxInlineFileSize datasize.ByteSize `toml:"max-inline-file-size" default:"256B"` 130 // Maximum size of a Git object that will be cached in memory during Git operations. 131 GitLargeObjectThreshold datasize.ByteSize `toml:"git-large-object-threshold" default:"1M"` 132 // Maximum number of symbolic link traversals before the path is considered unreachable. 133 MaxSymlinkDepth uint `toml:"max-symlink-depth" default:"16"` 134 // Maximum time that an update operation (PUT or POST request) could take before being 135 // interrupted. 136 UpdateTimeout Duration `toml:"update-timeout" default:"60s"` 137 // Soft limit on Go heap size, expressed as a fraction of total available RAM. 138 MaxHeapSizeRatio float64 `toml:"max-heap-size-ratio" default:"0.5"` 139 // List of domains unconditionally forbidden for uploads. 140 ForbiddenDomains []string `toml:"forbidden-domains" default:"[]"` 141 // List of allowed repository URL prefixes. Setting this option prohibits uploading archives. 142 AllowedRepositoryURLPrefixes []string `toml:"allowed-repository-url-prefixes" default:"[]"` 143 // List of allowed custom headers. Header name must be in the MIME canonical form, 144 // e.g. `Foo-Bar`. Setting this option permits including this custom header in `_headers`, 145 // unless it is fundamentally unsafe. 146 AllowedCustomHeaders []string `toml:"allowed-custom-headers" default:"[\"X-Clacks-Overhead\"]"` 147} 148 149type AuditConfig struct { 150 // Globally unique machine identifier (0 to 63 inclusive). 151 NodeID int `toml:"node-id"` 152 // Whether audit reports should be stored whenever an audit event occurs. 153 Collect bool `toml:"collect"` 154 // If not empty, includes the principal's IP address in audit reports, with the value specifying 155 // the source of the IP address. If the value is "X-Forwarded-For", the last item of the 156 // corresponding header field (assumed to be comma-separated) is used. If the value is 157 // "RemoteAddr", the connecting host's address is used. Any other value is disallowed. 158 IncludeIPs string `toml:"include-ip"` 159 // Endpoint to notify with a `GET /<notify-url>?<id>` whenever an audit event occurs. 160 NotifyURL *URL `toml:"notify-url"` 161} 162 163type ObservabilityConfig struct { 164 // Minimum duration for an HTTP request transaction to be unconditionally sampled. 165 SlowResponseThreshold Duration `toml:"slow-response-threshold" default:"500ms"` 166} 167 168func (config *Config) TOML() string { 169 result, err := toml.Marshal(config) 170 if err != nil { 171 panic(err) 172 } 173 return string(result) 174} 175 176func (config *Config) Feature(name string) bool { 177 return slices.Contains(config.Features, name) 178} 179 180type walkConfigState struct { 181 config reflect.Value 182 scopeType reflect.Type 183 index []int 184 segments []string 185} 186 187func walkConfigScope(scopeState walkConfigState, onKey func(string, reflect.Value) error) (err error) { 188 for _, field := range reflect.VisibleFields(scopeState.scopeType) { 189 fieldState := walkConfigState{config: scopeState.config} 190 fieldState.scopeType = field.Type 191 fieldState.index = append(scopeState.index, field.Index...) 192 var tagValue, ok = "", false 193 if tagValue, ok = field.Tag.Lookup("env"); !ok { 194 if tagValue, ok = field.Tag.Lookup("toml"); !ok { 195 continue // implicit skip 196 } 197 } else if tagValue == "-" { 198 continue // explicit skip 199 } 200 fieldSegment := strings.ReplaceAll(strings.ToUpper(tagValue), "-", "_") 201 fieldState.segments = append(scopeState.segments, fieldSegment) 202 switch field.Type.Kind() { 203 case reflect.Struct: 204 err = walkConfigScope(fieldState, onKey) 205 default: 206 err = onKey( 207 strings.Join(fieldState.segments, "_"), 208 scopeState.config.FieldByIndex(fieldState.index), 209 ) 210 } 211 if err != nil { 212 return 213 } 214 } 215 return 216} 217 218func walkConfig(config *Config, onKey func(string, reflect.Value) error) error { 219 state := walkConfigState{ 220 config: reflect.ValueOf(config).Elem(), 221 scopeType: reflect.TypeOf(config).Elem(), 222 index: []int{}, 223 segments: []string{"PAGES"}, 224 } 225 return walkConfigScope(state, onKey) 226} 227 228func setConfigValue(reflValue reflect.Value, repr string) (err error) { 229 valueAny := reflValue.Interface() 230 switch valueCast := valueAny.(type) { 231 case string: 232 reflValue.SetString(repr) 233 case []string: 234 reflValue.Set(reflect.ValueOf(strings.Split(repr, ","))) 235 case bool: 236 if valueCast, err = strconv.ParseBool(repr); err == nil { 237 reflValue.SetBool(valueCast) 238 } 239 case int: 240 var parsed int64 241 if parsed, err = strconv.ParseInt(repr, 10, strconv.IntSize); err == nil { 242 reflValue.SetInt(parsed) 243 } 244 case uint: 245 var parsed uint64 246 if parsed, err = strconv.ParseUint(repr, 10, strconv.IntSize); err == nil { 247 reflValue.SetUint(parsed) 248 } 249 case float64: 250 if valueCast, err = strconv.ParseFloat(repr, 64); err == nil { 251 reflValue.SetFloat(valueCast) 252 } 253 case datasize.ByteSize: 254 if valueCast, err = datasize.ParseString(repr); err == nil { 255 reflValue.Set(reflect.ValueOf(valueCast)) 256 } 257 case Duration: 258 var parsed time.Duration 259 if parsed, err = time.ParseDuration(repr); err == nil { 260 reflValue.Set(reflect.ValueOf(Duration(parsed))) 261 } 262 case *URL: 263 if repr == "" { 264 reflValue.Set(reflect.ValueOf(nil)) 265 } else { 266 var parsed *url.URL 267 if parsed, err = url.Parse(repr); err == nil { 268 reflValue.Set(reflect.ValueOf(&URL{*parsed})) 269 } 270 } 271 case []WildcardConfig: 272 var parsed []*WildcardConfig 273 decoder := json.NewDecoder(bytes.NewReader([]byte(repr))) 274 decoder.DisallowUnknownFields() 275 if err = decoder.Decode(&parsed); err == nil { 276 var assigned []WildcardConfig 277 for _, wildcard := range parsed { 278 defaults.MustSet(wildcard) 279 assigned = append(assigned, *wildcard) 280 } 281 reflValue.Set(reflect.ValueOf(assigned)) 282 } 283 default: 284 panic("unhandled config value type") 285 } 286 return err 287} 288 289func PrintConfigEnvVars() { 290 config := Config{} 291 defaults.MustSet(&config) 292 293 walkConfig(&config, func(envName string, reflValue reflect.Value) (err error) { 294 value := reflValue.Interface() 295 reprBefore := fmt.Sprint(value) 296 fmt.Printf("%s %T = %q\n", envName, value, reprBefore) 297 // make sure that the value, at least, roundtrips 298 setConfigValue(reflValue, reprBefore) 299 reprAfter := fmt.Sprint(value) 300 if reprBefore != reprAfter { 301 panic("failed to roundtrip config value") 302 } 303 return 304 }) 305} 306 307func Configure(tomlPath string) (config *Config, err error) { 308 // start with an all-default configuration 309 config = new(Config) 310 defaults.MustSet(config) 311 312 // inject values from `config.toml` 313 if tomlPath != "" { 314 var file *os.File 315 file, err = os.Open(tomlPath) 316 if err != nil { 317 return 318 } 319 defer file.Close() 320 321 decoder := toml.NewDecoder(file) 322 decoder.DisallowUnknownFields() 323 decoder.EnableUnmarshalerInterface() 324 if err = decoder.Decode(&config); err != nil { 325 return 326 } 327 } 328 329 // inject values from the environment, overriding everything else 330 err = walkConfig(config, func(envName string, reflValue reflect.Value) error { 331 if envValue, found := os.LookupEnv(envName); found { 332 return setConfigValue(reflValue, envValue) 333 } 334 return nil 335 }) 336 337 // defaults for wildcards aren't set by `defaults.MustSet` call above because the structs 338 // for them haven't been created yet 339 for i := range config.Wildcard { 340 defaults.MustSet(&config.Wildcard[i]) 341 } 342 343 return 344}