[mirror] Scalable static site server for Git forges (like GitHub Pages)
at v0.3.1 11 kB view raw
1package git_pages 2 3import ( 4 "bytes" 5 "encoding/json" 6 "fmt" 7 "net/url" 8 "os" 9 "reflect" 10 "slices" 11 "strconv" 12 "strings" 13 "time" 14 15 "github.com/c2h5oh/datasize" 16 "github.com/creasty/defaults" 17 "github.com/pelletier/go-toml/v2" 18) 19 20// For an unknown reason, the standard `time.Duration` type doesn't implement the standard 21// `encoding.{TextMarshaler,TextUnmarshaler}` interfaces. 22type Duration time.Duration 23 24func (t Duration) String() string { 25 return fmt.Sprint(time.Duration(t)) 26} 27 28func (t *Duration) UnmarshalText(data []byte) (err error) { 29 u, err := time.ParseDuration(string(data)) 30 if err == nil { 31 *t = Duration(u) 32 } 33 return 34} 35 36func (t *Duration) MarshalText() ([]byte, error) { 37 return []byte(t.String()), nil 38} 39 40// For a known but upsetting reason, the standard `url.URL` type doesn't implement the standard 41// `encoding.{TextMarshaler,TextUnmarshaler}` interfaces. 42type URL struct { 43 url.URL 44} 45 46func (t *URL) String() string { 47 return fmt.Sprint(&t.URL) 48} 49 50func (t *URL) UnmarshalText(data []byte) (err error) { 51 u, err := url.Parse(string(data)) 52 if err == nil { 53 *t = URL{*u} 54 } 55 return 56} 57 58func (t *URL) MarshalText() ([]byte, error) { 59 return []byte(t.String()), nil 60} 61 62type Config struct { 63 Insecure bool `toml:"-" env:"insecure"` 64 Features []string `toml:"features"` 65 LogFormat string `toml:"log-format" default:"text"` 66 LogLevel string `toml:"log-level" default:"info"` 67 Server ServerConfig `toml:"server"` 68 Wildcard []WildcardConfig `toml:"wildcard"` 69 Fallback FallbackConfig `toml:"fallback"` 70 Storage StorageConfig `toml:"storage"` 71 Limits LimitsConfig `toml:"limits"` 72 Audit AuditConfig `toml:"audit"` 73 Observability ObservabilityConfig `toml:"observability"` 74} 75 76type ServerConfig struct { 77 Pages string `toml:"pages" default:"tcp/:3000"` 78 Caddy string `toml:"caddy" default:"tcp/:3001"` 79 Metrics string `toml:"metrics" default:"tcp/:3002"` 80} 81 82type WildcardConfig struct { 83 Domain string `toml:"domain"` 84 CloneURL string `toml:"clone-url"` // URL template, not an exact URL 85 IndexRepos []string `toml:"index-repos" default:"[]"` 86 IndexRepoBranch string `toml:"index-repo-branch" default:"pages"` 87 Authorization string `toml:"authorization"` 88} 89 90type FallbackConfig struct { 91 ProxyTo *URL `toml:"proxy-to"` 92 Insecure bool `toml:"insecure"` 93} 94 95type CacheConfig struct { 96 MaxSize datasize.ByteSize `toml:"max-size"` 97 MaxAge Duration `toml:"max-age"` 98 MaxStale Duration `toml:"max-stale"` 99} 100 101type StorageConfig struct { 102 Type string `toml:"type" default:"fs"` 103 FS FSConfig `toml:"fs" default:"{\"Root\":\"./data\"}"` 104 S3 S3Config `toml:"s3"` 105} 106 107type FSConfig struct { 108 Root string `toml:"root"` 109} 110 111type S3Config struct { 112 Endpoint string `toml:"endpoint"` 113 Insecure bool `toml:"insecure"` 114 AccessKeyID string `toml:"access-key-id"` 115 SecretAccessKey string `toml:"secret-access-key"` 116 Region string `toml:"region"` 117 Bucket string `toml:"bucket"` 118 BlobCache CacheConfig `toml:"blob-cache" default:"{\"MaxSize\":\"256MB\"}"` 119 SiteCache CacheConfig `toml:"site-cache" default:"{\"MaxAge\":\"60s\",\"MaxStale\":\"1h\",\"MaxSize\":\"16MB\"}"` 120} 121 122type LimitsConfig struct { 123 // Maximum size of a single published site. Also used to limit the size of archive 124 // uploads and other similar overconsumption conditions. 125 MaxSiteSize datasize.ByteSize `toml:"max-site-size" default:"128M"` 126 // Maximum size of a single site manifest, computed over its binary Protobuf 127 // serialization. 128 MaxManifestSize datasize.ByteSize `toml:"max-manifest-size" default:"1M"` 129 // Maximum size of a file that will still be inlined into the site manifest. 130 MaxInlineFileSize datasize.ByteSize `toml:"max-inline-file-size" default:"256B"` 131 // Maximum size of a Git object that will be cached in memory during Git operations. 132 GitLargeObjectThreshold datasize.ByteSize `toml:"git-large-object-threshold" default:"1M"` 133 // Maximum number of symbolic link traversals before the path is considered unreachable. 134 MaxSymlinkDepth uint `toml:"max-symlink-depth" default:"16"` 135 // Maximum time that an update operation (PUT or POST request) could take before being 136 // interrupted. 137 UpdateTimeout Duration `toml:"update-timeout" default:"60s"` 138 // Soft limit on Go heap size, expressed as a fraction of total available RAM. 139 MaxHeapSizeRatio float64 `toml:"max-heap-size-ratio" default:"0.5"` 140 // List of domains unconditionally forbidden for uploads. 141 ForbiddenDomains []string `toml:"forbidden-domains" default:"[]"` 142 // List of allowed repository URL prefixes. Setting this option prohibits uploading archives. 143 AllowedRepositoryURLPrefixes []string `toml:"allowed-repository-url-prefixes"` 144 // List of allowed custom headers. Header name must be in the MIME canonical form, 145 // e.g. `Foo-Bar`. Setting this option permits including this custom header in `_headers`, 146 // unless it is fundamentally unsafe. 147 AllowedCustomHeaders []string `toml:"allowed-custom-headers" default:"[\"X-Clacks-Overhead\"]"` 148} 149 150type AuditConfig struct { 151 // Globally unique machine identifier (0 to 63 inclusive). 152 NodeID int `toml:"node-id"` 153 // Whether audit reports should be stored whenever an audit event occurs. 154 Collect bool `toml:"collect"` 155 // If not empty, includes the principal's IP address in audit reports, with the value specifying 156 // the source of the IP address. If the value is "X-Forwarded-For", the last item of the 157 // corresponding header field (assumed to be comma-separated) is used. If the value is 158 // "RemoteAddr", the connecting host's address is used. Any other value is disallowed. 159 IncludeIPs string `toml:"include-ip"` 160 // Endpoint to notify with a `GET /<notify-url>?<id>` whenever an audit event occurs. 161 NotifyURL *URL `toml:"notify-url"` 162} 163 164type ObservabilityConfig struct { 165 // Minimum duration for an HTTP request transaction to be unconditionally sampled. 166 SlowResponseThreshold Duration `toml:"slow-response-threshold" default:"500ms"` 167} 168 169func (config *Config) TOML() string { 170 result, err := toml.Marshal(config) 171 if err != nil { 172 panic(err) 173 } 174 return string(result) 175} 176 177func (config *Config) Feature(name string) bool { 178 return slices.Contains(config.Features, name) 179} 180 181type walkConfigState struct { 182 config reflect.Value 183 scopeType reflect.Type 184 index []int 185 segments []string 186} 187 188func walkConfigScope(scopeState walkConfigState, onKey func(string, reflect.Value) error) (err error) { 189 for _, field := range reflect.VisibleFields(scopeState.scopeType) { 190 fieldState := walkConfigState{config: scopeState.config} 191 fieldState.scopeType = field.Type 192 fieldState.index = append(scopeState.index, field.Index...) 193 var tagValue, ok = "", false 194 if tagValue, ok = field.Tag.Lookup("env"); !ok { 195 if tagValue, ok = field.Tag.Lookup("toml"); !ok { 196 continue // implicit skip 197 } 198 } else if tagValue == "-" { 199 continue // explicit skip 200 } 201 fieldSegment := strings.ReplaceAll(strings.ToUpper(tagValue), "-", "_") 202 fieldState.segments = append(scopeState.segments, fieldSegment) 203 switch field.Type.Kind() { 204 case reflect.Struct: 205 err = walkConfigScope(fieldState, onKey) 206 default: 207 err = onKey( 208 strings.Join(fieldState.segments, "_"), 209 scopeState.config.FieldByIndex(fieldState.index), 210 ) 211 } 212 if err != nil { 213 return 214 } 215 } 216 return 217} 218 219func walkConfig(config *Config, onKey func(string, reflect.Value) error) error { 220 state := walkConfigState{ 221 config: reflect.ValueOf(config).Elem(), 222 scopeType: reflect.TypeOf(config).Elem(), 223 index: []int{}, 224 segments: []string{"PAGES"}, 225 } 226 return walkConfigScope(state, onKey) 227} 228 229func setConfigValue(reflValue reflect.Value, repr string) (err error) { 230 valueAny := reflValue.Interface() 231 switch valueCast := valueAny.(type) { 232 case string: 233 reflValue.SetString(repr) 234 case []string: 235 reflValue.Set(reflect.ValueOf(strings.Split(repr, ","))) 236 case bool: 237 if valueCast, err = strconv.ParseBool(repr); err == nil { 238 reflValue.SetBool(valueCast) 239 } 240 case int: 241 var parsed int64 242 if parsed, err = strconv.ParseInt(repr, 10, strconv.IntSize); err == nil { 243 reflValue.SetInt(parsed) 244 } 245 case uint: 246 var parsed uint64 247 if parsed, err = strconv.ParseUint(repr, 10, strconv.IntSize); err == nil { 248 reflValue.SetUint(parsed) 249 } 250 case float64: 251 if valueCast, err = strconv.ParseFloat(repr, 64); err == nil { 252 reflValue.SetFloat(valueCast) 253 } 254 case datasize.ByteSize: 255 if valueCast, err = datasize.ParseString(repr); err == nil { 256 reflValue.Set(reflect.ValueOf(valueCast)) 257 } 258 case Duration: 259 var parsed time.Duration 260 if parsed, err = time.ParseDuration(repr); err == nil { 261 reflValue.Set(reflect.ValueOf(Duration(parsed))) 262 } 263 case *URL: 264 if repr == "" { 265 reflValue.Set(reflect.ValueOf(nil)) 266 } else { 267 var parsed *url.URL 268 if parsed, err = url.Parse(repr); err == nil { 269 reflValue.Set(reflect.ValueOf(&URL{*parsed})) 270 } 271 } 272 case []WildcardConfig: 273 var parsed []*WildcardConfig 274 decoder := json.NewDecoder(bytes.NewReader([]byte(repr))) 275 decoder.DisallowUnknownFields() 276 if err = decoder.Decode(&parsed); err == nil { 277 var assigned []WildcardConfig 278 for _, wildcard := range parsed { 279 defaults.MustSet(wildcard) 280 assigned = append(assigned, *wildcard) 281 } 282 reflValue.Set(reflect.ValueOf(assigned)) 283 } 284 default: 285 panic("unhandled config value type") 286 } 287 return err 288} 289 290func PrintConfigEnvVars() { 291 config := Config{} 292 defaults.MustSet(&config) 293 294 walkConfig(&config, func(envName string, reflValue reflect.Value) (err error) { 295 value := reflValue.Interface() 296 reprBefore := fmt.Sprint(value) 297 fmt.Printf("%s %T = %q\n", envName, value, reprBefore) 298 // make sure that the value, at least, roundtrips 299 setConfigValue(reflValue, reprBefore) 300 reprAfter := fmt.Sprint(value) 301 if reprBefore != reprAfter { 302 panic("failed to roundtrip config value") 303 } 304 return 305 }) 306} 307 308func Configure(tomlPath string) (config *Config, err error) { 309 // start with an all-default configuration 310 config = new(Config) 311 defaults.MustSet(config) 312 313 // inject values from `config.toml` 314 if tomlPath != "" { 315 var file *os.File 316 file, err = os.Open(tomlPath) 317 if err != nil { 318 return 319 } 320 defer file.Close() 321 322 decoder := toml.NewDecoder(file) 323 decoder.DisallowUnknownFields() 324 decoder.EnableUnmarshalerInterface() 325 if err = decoder.Decode(&config); err != nil { 326 return 327 } 328 } 329 330 // inject values from the environment, overriding everything else 331 err = walkConfig(config, func(envName string, reflValue reflect.Value) error { 332 if envValue, found := os.LookupEnv(envName); found { 333 return setConfigValue(reflValue, envValue) 334 } 335 return nil 336 }) 337 338 // defaults for wildcards aren't set by `defaults.MustSet` call above because the structs 339 // for them haven't been created yet 340 for i := range config.Wildcard { 341 defaults.MustSet(&config.Wildcard[i]) 342 } 343 344 return 345}