[mirror] Scalable static site server for Git forges (like GitHub Pages)
1package git_pages
2
3import (
4 "bytes"
5 "encoding/json"
6 "fmt"
7 "net/url"
8 "os"
9 "reflect"
10 "slices"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/c2h5oh/datasize"
16 "github.com/creasty/defaults"
17 "github.com/pelletier/go-toml/v2"
18)
19
20// For an unknown reason, the standard `time.Duration` type doesn't implement the standard
21// `encoding.{TextMarshaler,TextUnmarshaler}` interfaces.
22type Duration time.Duration
23
24func (t Duration) String() string {
25 return fmt.Sprint(time.Duration(t))
26}
27
28func (t *Duration) UnmarshalText(data []byte) (err error) {
29 u, err := time.ParseDuration(string(data))
30 if err == nil {
31 *t = Duration(u)
32 }
33 return
34}
35
36func (t *Duration) MarshalText() ([]byte, error) {
37 return []byte(t.String()), nil
38}
39
40// For a known but upsetting reason, the standard `url.URL` type doesn't implement the standard
41// `encoding.{TextMarshaler,TextUnmarshaler}` interfaces.
42type URL struct {
43 url.URL
44}
45
46func (t *URL) String() string {
47 return fmt.Sprint(&t.URL)
48}
49
50func (t *URL) UnmarshalText(data []byte) (err error) {
51 u, err := url.Parse(string(data))
52 if err == nil {
53 *t = URL{*u}
54 }
55 return
56}
57
58func (t *URL) MarshalText() ([]byte, error) {
59 return []byte(t.String()), nil
60}
61
62type Config struct {
63 Insecure bool `toml:"-" env:"insecure"`
64 Features []string `toml:"features"`
65 LogFormat string `toml:"log-format" default:"text"`
66 LogLevel string `toml:"log-level" default:"info"`
67 Server ServerConfig `toml:"server"`
68 Wildcard []WildcardConfig `toml:"wildcard"`
69 Fallback FallbackConfig `toml:"fallback"`
70 Storage StorageConfig `toml:"storage"`
71 Limits LimitsConfig `toml:"limits"`
72 Audit AuditConfig `toml:"audit"`
73 Observability ObservabilityConfig `toml:"observability"`
74}
75
76type ServerConfig struct {
77 Pages string `toml:"pages" default:"tcp/:3000"`
78 Caddy string `toml:"caddy" default:"tcp/:3001"`
79 Metrics string `toml:"metrics" default:"tcp/:3002"`
80}
81
82type WildcardConfig struct {
83 Domain string `toml:"domain"`
84 CloneURL string `toml:"clone-url"` // URL template, not an exact URL
85 IndexRepos []string `toml:"index-repos" default:"[]"`
86 IndexRepoBranch string `toml:"index-repo-branch" default:"pages"`
87 Authorization string `toml:"authorization"`
88}
89
90type FallbackConfig struct {
91 ProxyTo *URL `toml:"proxy-to"`
92 Insecure bool `toml:"insecure"`
93}
94
95type CacheConfig struct {
96 MaxSize datasize.ByteSize `toml:"max-size"`
97 MaxAge Duration `toml:"max-age"`
98 MaxStale Duration `toml:"max-stale"`
99}
100
101type StorageConfig struct {
102 Type string `toml:"type" default:"fs"`
103 FS FSConfig `toml:"fs" default:"{\"Root\":\"./data\"}"`
104 S3 S3Config `toml:"s3"`
105}
106
107type FSConfig struct {
108 Root string `toml:"root"`
109}
110
111type S3Config struct {
112 Endpoint string `toml:"endpoint"`
113 Insecure bool `toml:"insecure"`
114 AccessKeyID string `toml:"access-key-id"`
115 SecretAccessKey string `toml:"secret-access-key"`
116 Region string `toml:"region"`
117 Bucket string `toml:"bucket"`
118 BlobCache CacheConfig `toml:"blob-cache" default:"{\"MaxSize\":\"256MB\"}"`
119 SiteCache CacheConfig `toml:"site-cache" default:"{\"MaxAge\":\"60s\",\"MaxStale\":\"1h\",\"MaxSize\":\"16MB\"}"`
120}
121
122type LimitsConfig struct {
123 // Maximum size of a single published site. Also used to limit the size of archive
124 // uploads and other similar overconsumption conditions.
125 MaxSiteSize datasize.ByteSize `toml:"max-site-size" default:"128M"`
126 // Maximum size of a single site manifest, computed over its binary Protobuf
127 // serialization.
128 MaxManifestSize datasize.ByteSize `toml:"max-manifest-size" default:"1M"`
129 // Maximum size of a file that will still be inlined into the site manifest.
130 MaxInlineFileSize datasize.ByteSize `toml:"max-inline-file-size" default:"256B"`
131 // Maximum size of a Git object that will be cached in memory during Git operations.
132 GitLargeObjectThreshold datasize.ByteSize `toml:"git-large-object-threshold" default:"1M"`
133 // Maximum number of symbolic link traversals before the path is considered unreachable.
134 MaxSymlinkDepth uint `toml:"max-symlink-depth" default:"16"`
135 // Maximum time that an update operation (PUT or POST request) could take before being
136 // interrupted.
137 UpdateTimeout Duration `toml:"update-timeout" default:"60s"`
138 // Soft limit on Go heap size, expressed as a fraction of total available RAM.
139 MaxHeapSizeRatio float64 `toml:"max-heap-size-ratio" default:"0.5"`
140 // List of domains unconditionally forbidden for uploads.
141 ForbiddenDomains []string `toml:"forbidden-domains" default:"[]"`
142 // List of allowed repository URL prefixes. Setting this option prohibits uploading archives.
143 AllowedRepositoryURLPrefixes []string `toml:"allowed-repository-url-prefixes"`
144 // List of allowed custom headers. Header name must be in the MIME canonical form,
145 // e.g. `Foo-Bar`. Setting this option permits including this custom header in `_headers`,
146 // unless it is fundamentally unsafe.
147 AllowedCustomHeaders []string `toml:"allowed-custom-headers" default:"[\"X-Clacks-Overhead\"]"`
148}
149
150type AuditConfig struct {
151 // Globally unique machine identifier (0 to 63 inclusive).
152 NodeID int `toml:"node-id"`
153 // Whether audit reports should be stored whenever an audit event occurs.
154 Collect bool `toml:"collect"`
155 // If not empty, includes the principal's IP address in audit reports, with the value specifying
156 // the source of the IP address. If the value is "X-Forwarded-For", the last item of the
157 // corresponding header field (assumed to be comma-separated) is used. If the value is
158 // "RemoteAddr", the connecting host's address is used. Any other value is disallowed.
159 IncludeIPs string `toml:"include-ip"`
160 // Endpoint to notify with a `GET /<notify-url>?<id>` whenever an audit event occurs.
161 NotifyURL *URL `toml:"notify-url"`
162}
163
164type ObservabilityConfig struct {
165 // Minimum duration for an HTTP request transaction to be unconditionally sampled.
166 SlowResponseThreshold Duration `toml:"slow-response-threshold" default:"500ms"`
167}
168
169func (config *Config) TOML() string {
170 result, err := toml.Marshal(config)
171 if err != nil {
172 panic(err)
173 }
174 return string(result)
175}
176
177func (config *Config) Feature(name string) bool {
178 return slices.Contains(config.Features, name)
179}
180
181type walkConfigState struct {
182 config reflect.Value
183 scopeType reflect.Type
184 index []int
185 segments []string
186}
187
188func walkConfigScope(scopeState walkConfigState, onKey func(string, reflect.Value) error) (err error) {
189 for _, field := range reflect.VisibleFields(scopeState.scopeType) {
190 fieldState := walkConfigState{config: scopeState.config}
191 fieldState.scopeType = field.Type
192 fieldState.index = append(scopeState.index, field.Index...)
193 var tagValue, ok = "", false
194 if tagValue, ok = field.Tag.Lookup("env"); !ok {
195 if tagValue, ok = field.Tag.Lookup("toml"); !ok {
196 continue // implicit skip
197 }
198 } else if tagValue == "-" {
199 continue // explicit skip
200 }
201 fieldSegment := strings.ReplaceAll(strings.ToUpper(tagValue), "-", "_")
202 fieldState.segments = append(scopeState.segments, fieldSegment)
203 switch field.Type.Kind() {
204 case reflect.Struct:
205 err = walkConfigScope(fieldState, onKey)
206 default:
207 err = onKey(
208 strings.Join(fieldState.segments, "_"),
209 scopeState.config.FieldByIndex(fieldState.index),
210 )
211 }
212 if err != nil {
213 return
214 }
215 }
216 return
217}
218
219func walkConfig(config *Config, onKey func(string, reflect.Value) error) error {
220 state := walkConfigState{
221 config: reflect.ValueOf(config).Elem(),
222 scopeType: reflect.TypeOf(config).Elem(),
223 index: []int{},
224 segments: []string{"PAGES"},
225 }
226 return walkConfigScope(state, onKey)
227}
228
229func setConfigValue(reflValue reflect.Value, repr string) (err error) {
230 valueAny := reflValue.Interface()
231 switch valueCast := valueAny.(type) {
232 case string:
233 reflValue.SetString(repr)
234 case []string:
235 reflValue.Set(reflect.ValueOf(strings.Split(repr, ",")))
236 case bool:
237 if valueCast, err = strconv.ParseBool(repr); err == nil {
238 reflValue.SetBool(valueCast)
239 }
240 case int:
241 var parsed int64
242 if parsed, err = strconv.ParseInt(repr, 10, strconv.IntSize); err == nil {
243 reflValue.SetInt(parsed)
244 }
245 case uint:
246 var parsed uint64
247 if parsed, err = strconv.ParseUint(repr, 10, strconv.IntSize); err == nil {
248 reflValue.SetUint(parsed)
249 }
250 case float64:
251 if valueCast, err = strconv.ParseFloat(repr, 64); err == nil {
252 reflValue.SetFloat(valueCast)
253 }
254 case datasize.ByteSize:
255 if valueCast, err = datasize.ParseString(repr); err == nil {
256 reflValue.Set(reflect.ValueOf(valueCast))
257 }
258 case Duration:
259 var parsed time.Duration
260 if parsed, err = time.ParseDuration(repr); err == nil {
261 reflValue.Set(reflect.ValueOf(Duration(parsed)))
262 }
263 case *URL:
264 if repr == "" {
265 reflValue.Set(reflect.ValueOf(nil))
266 } else {
267 var parsed *url.URL
268 if parsed, err = url.Parse(repr); err == nil {
269 reflValue.Set(reflect.ValueOf(&URL{*parsed}))
270 }
271 }
272 case []WildcardConfig:
273 var parsed []*WildcardConfig
274 decoder := json.NewDecoder(bytes.NewReader([]byte(repr)))
275 decoder.DisallowUnknownFields()
276 if err = decoder.Decode(&parsed); err == nil {
277 var assigned []WildcardConfig
278 for _, wildcard := range parsed {
279 defaults.MustSet(wildcard)
280 assigned = append(assigned, *wildcard)
281 }
282 reflValue.Set(reflect.ValueOf(assigned))
283 }
284 default:
285 panic("unhandled config value type")
286 }
287 return err
288}
289
290func PrintConfigEnvVars() {
291 config := Config{}
292 defaults.MustSet(&config)
293
294 walkConfig(&config, func(envName string, reflValue reflect.Value) (err error) {
295 value := reflValue.Interface()
296 reprBefore := fmt.Sprint(value)
297 fmt.Printf("%s %T = %q\n", envName, value, reprBefore)
298 // make sure that the value, at least, roundtrips
299 setConfigValue(reflValue, reprBefore)
300 reprAfter := fmt.Sprint(value)
301 if reprBefore != reprAfter {
302 panic("failed to roundtrip config value")
303 }
304 return
305 })
306}
307
308func Configure(tomlPath string) (config *Config, err error) {
309 // start with an all-default configuration
310 config = new(Config)
311 defaults.MustSet(config)
312
313 // inject values from `config.toml`
314 if tomlPath != "" {
315 var file *os.File
316 file, err = os.Open(tomlPath)
317 if err != nil {
318 return
319 }
320 defer file.Close()
321
322 decoder := toml.NewDecoder(file)
323 decoder.DisallowUnknownFields()
324 decoder.EnableUnmarshalerInterface()
325 if err = decoder.Decode(&config); err != nil {
326 return
327 }
328 }
329
330 // inject values from the environment, overriding everything else
331 err = walkConfig(config, func(envName string, reflValue reflect.Value) error {
332 if envValue, found := os.LookupEnv(envName); found {
333 return setConfigValue(reflValue, envValue)
334 }
335 return nil
336 })
337
338 // defaults for wildcards aren't set by `defaults.MustSet` call above because the structs
339 // for them haven't been created yet
340 for i := range config.Wildcard {
341 defaults.MustSet(&config.Wildcard[i])
342 }
343
344 return
345}