[mirror] Scalable static site server for Git forges (like GitHub Pages)
1package git_pages
2
3import (
4 "bytes"
5 "encoding/json"
6 "fmt"
7 "net/url"
8 "os"
9 "reflect"
10 "slices"
11 "strconv"
12 "strings"
13 "time"
14
15 "github.com/c2h5oh/datasize"
16 "github.com/creasty/defaults"
17 "github.com/pelletier/go-toml/v2"
18)
19
20// For an unknown reason, the standard `time.Duration` type doesn't implement the standard
21// `encoding.{TextMarshaler,TextUnmarshaler}` interfaces.
22type Duration time.Duration
23
24func (t Duration) String() string {
25 return fmt.Sprint(time.Duration(t))
26}
27
28func (t *Duration) UnmarshalText(data []byte) (err error) {
29 u, err := time.ParseDuration(string(data))
30 if err == nil {
31 *t = Duration(u)
32 }
33 return
34}
35
36func (t *Duration) MarshalText() ([]byte, error) {
37 return []byte(t.String()), nil
38}
39
40// For a known but upsetting reason, the standard `url.URL` type doesn't implement the standard
41// `encoding.{TextMarshaler,TextUnmarshaler}` interfaces.
42type URL struct {
43 url.URL
44}
45
46func (t *URL) String() string {
47 return fmt.Sprint(&t.URL)
48}
49
50func (t *URL) UnmarshalText(data []byte) (err error) {
51 u, err := url.Parse(string(data))
52 if err == nil {
53 *t = URL{*u}
54 }
55 return
56}
57
58func (t *URL) MarshalText() ([]byte, error) {
59 return []byte(t.String()), nil
60}
61
62type Config struct {
63 Insecure bool `toml:"-" env:"insecure"`
64 Features []string `toml:"features"`
65 LogFormat string `toml:"log-format" default:"text"`
66 Server ServerConfig `toml:"server"`
67 Wildcard []WildcardConfig `toml:"wildcard"`
68 Fallback FallbackConfig `toml:"fallback"`
69 Storage StorageConfig `toml:"storage"`
70 Limits LimitsConfig `toml:"limits"`
71 Audit AuditConfig `toml:"audit"`
72 Observability ObservabilityConfig `toml:"observability"`
73}
74
75type ServerConfig struct {
76 Pages string `toml:"pages" default:"tcp/localhost:3000"`
77 Caddy string `toml:"caddy" default:"tcp/localhost:3001"`
78 Metrics string `toml:"metrics" default:"tcp/localhost:3002"`
79}
80
81type WildcardConfig struct {
82 Domain string `toml:"domain"`
83 CloneURL string `toml:"clone-url"` // URL template, not an exact URL
84 IndexRepos []string `toml:"index-repos" default:"[]"`
85 IndexRepoBranch string `toml:"index-repo-branch" default:"pages"`
86 Authorization string `toml:"authorization"`
87}
88
89type FallbackConfig struct {
90 ProxyTo *URL `toml:"proxy-to"`
91 Insecure bool `toml:"insecure"`
92}
93
94type CacheConfig struct {
95 MaxSize datasize.ByteSize `toml:"max-size"`
96 MaxAge Duration `toml:"max-age"`
97 MaxStale Duration `toml:"max-stale"`
98}
99
100type StorageConfig struct {
101 Type string `toml:"type" default:"fs"`
102 FS FSConfig `toml:"fs" default:"{\"Root\":\"./data\"}"`
103 S3 S3Config `toml:"s3"`
104}
105
106type FSConfig struct {
107 Root string `toml:"root"`
108}
109
110type S3Config struct {
111 Endpoint string `toml:"endpoint"`
112 Insecure bool `toml:"insecure"`
113 AccessKeyID string `toml:"access-key-id"`
114 SecretAccessKey string `toml:"secret-access-key"`
115 Region string `toml:"region"`
116 Bucket string `toml:"bucket"`
117 BlobCache CacheConfig `toml:"blob-cache" default:"{\"MaxSize\":\"256MB\"}"`
118 SiteCache CacheConfig `toml:"site-cache" default:"{\"MaxAge\":\"60s\",\"MaxStale\":\"1h\",\"MaxSize\":\"16MB\"}"`
119}
120
121type LimitsConfig struct {
122 // Maximum size of a single published site. Also used to limit the size of archive
123 // uploads and other similar overconsumption conditions.
124 MaxSiteSize datasize.ByteSize `toml:"max-site-size" default:"128M"`
125 // Maximum size of a single site manifest, computed over its binary Protobuf
126 // serialization.
127 MaxManifestSize datasize.ByteSize `toml:"max-manifest-size" default:"1M"`
128 // Maximum size of a file that will still be inlined into the site manifest.
129 MaxInlineFileSize datasize.ByteSize `toml:"max-inline-file-size" default:"256B"`
130 // Maximum size of a Git object that will be cached in memory during Git operations.
131 GitLargeObjectThreshold datasize.ByteSize `toml:"git-large-object-threshold" default:"1M"`
132 // Maximum number of symbolic link traversals before the path is considered unreachable.
133 MaxSymlinkDepth uint `toml:"max-symlink-depth" default:"16"`
134 // Maximum time that an update operation (PUT or POST request) could take before being
135 // interrupted.
136 UpdateTimeout Duration `toml:"update-timeout" default:"60s"`
137 // Soft limit on Go heap size, expressed as a fraction of total available RAM.
138 MaxHeapSizeRatio float64 `toml:"max-heap-size-ratio" default:"0.5"`
139 // List of domains unconditionally forbidden for uploads.
140 ForbiddenDomains []string `toml:"forbidden-domains" default:"[]"`
141 // List of allowed repository URL prefixes. Setting this option prohibits uploading archives.
142 AllowedRepositoryURLPrefixes []string `toml:"allowed-repository-url-prefixes" default:"[]"`
143 // List of allowed custom headers. Header name must be in the MIME canonical form,
144 // e.g. `Foo-Bar`. Setting this option permits including this custom header in `_headers`,
145 // unless it is fundamentally unsafe.
146 AllowedCustomHeaders []string `toml:"allowed-custom-headers" default:"[\"X-Clacks-Overhead\"]"`
147}
148
149type AuditConfig struct {
150 // Globally unique machine identifier (0 to 63 inclusive).
151 NodeID int `toml:"node-id"`
152 // Whether audit reports should be stored whenever an audit event occurs.
153 Collect bool `toml:"collect"`
154 // If not empty, includes the principal's IP address in audit reports, with the value specifying
155 // the source of the IP address. If the value is "X-Forwarded-For", the last item of the
156 // corresponding header field (assumed to be comma-separated) is used. If the value is
157 // "RemoteAddr", the connecting host's address is used. Any other value is disallowed.
158 IncludeIPs string `toml:"include-ip"`
159 // Endpoint to notify with a `GET /<notify-url>?<id>` whenever an audit event occurs.
160 NotifyURL *URL `toml:"notify-url"`
161}
162
163type ObservabilityConfig struct {
164 // Minimum duration for an HTTP request transaction to be unconditionally sampled.
165 SlowResponseThreshold Duration `toml:"slow-response-threshold" default:"500ms"`
166}
167
168func (config *Config) TOML() string {
169 result, err := toml.Marshal(config)
170 if err != nil {
171 panic(err)
172 }
173 return string(result)
174}
175
176func (config *Config) Feature(name string) bool {
177 return slices.Contains(config.Features, name)
178}
179
180type walkConfigState struct {
181 config reflect.Value
182 scopeType reflect.Type
183 index []int
184 segments []string
185}
186
187func walkConfigScope(scopeState walkConfigState, onKey func(string, reflect.Value) error) (err error) {
188 for _, field := range reflect.VisibleFields(scopeState.scopeType) {
189 fieldState := walkConfigState{config: scopeState.config}
190 fieldState.scopeType = field.Type
191 fieldState.index = append(scopeState.index, field.Index...)
192 var tagValue, ok = "", false
193 if tagValue, ok = field.Tag.Lookup("env"); !ok {
194 if tagValue, ok = field.Tag.Lookup("toml"); !ok {
195 continue // implicit skip
196 }
197 } else if tagValue == "-" {
198 continue // explicit skip
199 }
200 fieldSegment := strings.ReplaceAll(strings.ToUpper(tagValue), "-", "_")
201 fieldState.segments = append(scopeState.segments, fieldSegment)
202 switch field.Type.Kind() {
203 case reflect.Struct:
204 err = walkConfigScope(fieldState, onKey)
205 default:
206 err = onKey(
207 strings.Join(fieldState.segments, "_"),
208 scopeState.config.FieldByIndex(fieldState.index),
209 )
210 }
211 if err != nil {
212 return
213 }
214 }
215 return
216}
217
218func walkConfig(config *Config, onKey func(string, reflect.Value) error) error {
219 state := walkConfigState{
220 config: reflect.ValueOf(config).Elem(),
221 scopeType: reflect.TypeOf(config).Elem(),
222 index: []int{},
223 segments: []string{"PAGES"},
224 }
225 return walkConfigScope(state, onKey)
226}
227
228func setConfigValue(reflValue reflect.Value, repr string) (err error) {
229 valueAny := reflValue.Interface()
230 switch valueCast := valueAny.(type) {
231 case string:
232 reflValue.SetString(repr)
233 case []string:
234 reflValue.Set(reflect.ValueOf(strings.Split(repr, ",")))
235 case bool:
236 if valueCast, err = strconv.ParseBool(repr); err == nil {
237 reflValue.SetBool(valueCast)
238 }
239 case int:
240 var parsed int64
241 if parsed, err = strconv.ParseInt(repr, 10, strconv.IntSize); err == nil {
242 reflValue.SetInt(parsed)
243 }
244 case uint:
245 var parsed uint64
246 if parsed, err = strconv.ParseUint(repr, 10, strconv.IntSize); err == nil {
247 reflValue.SetUint(parsed)
248 }
249 case float64:
250 if valueCast, err = strconv.ParseFloat(repr, 64); err == nil {
251 reflValue.SetFloat(valueCast)
252 }
253 case datasize.ByteSize:
254 if valueCast, err = datasize.ParseString(repr); err == nil {
255 reflValue.Set(reflect.ValueOf(valueCast))
256 }
257 case Duration:
258 var parsed time.Duration
259 if parsed, err = time.ParseDuration(repr); err == nil {
260 reflValue.Set(reflect.ValueOf(Duration(parsed)))
261 }
262 case *URL:
263 if repr == "" {
264 reflValue.Set(reflect.ValueOf(nil))
265 } else {
266 var parsed *url.URL
267 if parsed, err = url.Parse(repr); err == nil {
268 reflValue.Set(reflect.ValueOf(&URL{*parsed}))
269 }
270 }
271 case []WildcardConfig:
272 var parsed []*WildcardConfig
273 decoder := json.NewDecoder(bytes.NewReader([]byte(repr)))
274 decoder.DisallowUnknownFields()
275 if err = decoder.Decode(&parsed); err == nil {
276 var assigned []WildcardConfig
277 for _, wildcard := range parsed {
278 defaults.MustSet(wildcard)
279 assigned = append(assigned, *wildcard)
280 }
281 reflValue.Set(reflect.ValueOf(assigned))
282 }
283 default:
284 panic("unhandled config value type")
285 }
286 return err
287}
288
289func PrintConfigEnvVars() {
290 config := Config{}
291 defaults.MustSet(&config)
292
293 walkConfig(&config, func(envName string, reflValue reflect.Value) (err error) {
294 value := reflValue.Interface()
295 reprBefore := fmt.Sprint(value)
296 fmt.Printf("%s %T = %q\n", envName, value, reprBefore)
297 // make sure that the value, at least, roundtrips
298 setConfigValue(reflValue, reprBefore)
299 reprAfter := fmt.Sprint(value)
300 if reprBefore != reprAfter {
301 panic("failed to roundtrip config value")
302 }
303 return
304 })
305}
306
307func Configure(tomlPath string) (config *Config, err error) {
308 // start with an all-default configuration
309 config = new(Config)
310 defaults.MustSet(config)
311
312 // inject values from `config.toml`
313 if tomlPath != "" {
314 var file *os.File
315 file, err = os.Open(tomlPath)
316 if err != nil {
317 return
318 }
319 defer file.Close()
320
321 decoder := toml.NewDecoder(file)
322 decoder.DisallowUnknownFields()
323 decoder.EnableUnmarshalerInterface()
324 if err = decoder.Decode(&config); err != nil {
325 return
326 }
327 }
328
329 // inject values from the environment, overriding everything else
330 err = walkConfig(config, func(envName string, reflValue reflect.Value) error {
331 if envValue, found := os.LookupEnv(envName); found {
332 return setConfigValue(reflValue, envValue)
333 }
334 return nil
335 })
336
337 // defaults for wildcards aren't set by `defaults.MustSet` call above because the structs
338 // for them haven't been created yet
339 for i := range config.Wildcard {
340 defaults.MustSet(&config.Wildcard[i])
341 }
342
343 return
344}