A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go

begin s3 garbage collection implementation, more envvar cleanup

evan.jarrett.net 9e600649 64cdb669

verified
-11
.env.appview.example
··· 21 # Production: Set to your public URL (e.g., https://atcr.io) 22 # ATCR_BASE_URL=http://127.0.0.1:5000 23 24 - # Service name (used for JWT service/issuer fields) 25 - # Default: Derived from base URL hostname, or "atcr.io" 26 - # ATCR_SERVICE_NAME=atcr.io 27 - 28 # ============================================================================== 29 # Storage Configuration 30 # ============================================================================== ··· 48 # Path to JWT signing certificate (auto-generated if missing) 49 # Default: /var/lib/atcr/auth/private-key.crt 50 # ATCR_AUTH_CERT_PATH=/var/lib/atcr/auth/private-key.crt 51 - 52 - # JWT token expiration in seconds (default: 300 = 5 minutes) 53 - # ATCR_TOKEN_EXPIRATION=300 54 55 # Path to OAuth client P-256 signing key (auto-generated on first run) 56 # Used for confidential OAuth client authentication (production only) ··· 130 # ATProto relay endpoint for backfill sync API 131 # Default: https://relay1.us-east.bsky.network 132 # ATCR_RELAY_ENDPOINT=https://relay1.us-east.bsky.network 133 - 134 - # Backfill interval (default: 1h) 135 - # Examples: 30m, 1h, 2h, 24h 136 - # ATCR_BACKFILL_INTERVAL=1h
··· 21 # Production: Set to your public URL (e.g., https://atcr.io) 22 # ATCR_BASE_URL=http://127.0.0.1:5000 23 24 # ============================================================================== 25 # Storage Configuration 26 # ============================================================================== ··· 44 # Path to JWT signing certificate (auto-generated if missing) 45 # Default: /var/lib/atcr/auth/private-key.crt 46 # ATCR_AUTH_CERT_PATH=/var/lib/atcr/auth/private-key.crt 47 48 # Path to OAuth client P-256 signing key (auto-generated on first run) 49 # Used for confidential OAuth client authentication (production only) ··· 123 # ATProto relay endpoint for backfill sync API 124 # Default: https://relay1.us-east.bsky.network 125 # ATCR_RELAY_ENDPOINT=https://relay1.us-east.bsky.network
-13
.env.example
··· 45 # Production: Set to your public URL (e.g., https://atcr.io) 46 # ATCR_BASE_URL=https://atcr.io 47 48 - # Service name for JWT issuer/service fields 49 - # Default: Derived from ATCR_BASE_URL hostname, or "atcr.io" 50 - # ATCR_SERVICE_NAME=atcr.io 51 - 52 # ============================================================================== 53 # APPVIEW - STORAGE CONFIGURATION (REQUIRED) 54 # ============================================================================== ··· 72 # Default: /var/lib/atcr/auth/private-key.crt 73 # ATCR_AUTH_CERT_PATH=/var/lib/atcr/auth/private-key.crt 74 75 - # JWT token expiration in seconds 76 - # Default: 300 (5 minutes) 77 - # ATCR_TOKEN_EXPIRATION=300 78 - 79 # Path to OAuth client P-256 signing key (auto-generated for production) 80 # Used for confidential OAuth client authentication 81 # Localhost deployments always use public OAuth clients (no key needed) ··· 109 # ATProto relay endpoint for backfill sync API 110 # Default: https://relay1.us-east.bsky.network 111 # ATCR_RELAY_ENDPOINT=https://relay1.us-east.bsky.network 112 - 113 - # Backfill sync interval 114 - # Default: 1h 115 - # Examples: 30m, 1h, 2h, 24h 116 - # ATCR_BACKFILL_INTERVAL=1h 117 118 # ============================================================================== 119 # APPVIEW - HEALTH CHECKS
··· 45 # Production: Set to your public URL (e.g., https://atcr.io) 46 # ATCR_BASE_URL=https://atcr.io 47 48 # ============================================================================== 49 # APPVIEW - STORAGE CONFIGURATION (REQUIRED) 50 # ============================================================================== ··· 68 # Default: /var/lib/atcr/auth/private-key.crt 69 # ATCR_AUTH_CERT_PATH=/var/lib/atcr/auth/private-key.crt 70 71 # Path to OAuth client P-256 signing key (auto-generated for production) 72 # Used for confidential OAuth client authentication 73 # Localhost deployments always use public OAuth clients (no key needed) ··· 101 # ATProto relay endpoint for backfill sync API 102 # Default: https://relay1.us-east.bsky.network 103 # ATCR_RELAY_ENDPOINT=https://relay1.us-east.bsky.network 104 105 # ============================================================================== 106 # APPVIEW - HEALTH CHECKS
+12
.env.hold.example
··· 151 # Basic auth credentials (optional) 152 # ATCR_LOG_SHIPPER_USERNAME= 153 # ATCR_LOG_SHIPPER_PASSWORD=
··· 151 # Basic auth credentials (optional) 152 # ATCR_LOG_SHIPPER_USERNAME= 153 # ATCR_LOG_SHIPPER_PASSWORD= 154 + 155 + # ============================================================================== 156 + # Garbage Collection 157 + # ============================================================================== 158 + 159 + # Enable garbage collection for orphaned blobs (default: true) 160 + # GC runs on startup and then nightly (every 24 hours) 161 + GC_ENABLED=true 162 + 163 + # Dry-run mode: log what would be deleted without actually deleting (default: true) 164 + # Set to false after validating the GC logs show correct behavior 165 + GC_DRY_RUN=true
+1 -2
CLAUDE.md
··· 230 - **Issued by:** AppView after OAuth login 231 - **Stored in:** Docker credential helper (`~/.atcr/credential-helper-token.json`) 232 - **Used for:** Docker client → AppView authentication 233 - - **Lifetime:** 15 minutes (configurable via `ATCR_TOKEN_EXPIRATION`) 234 - **Format:** JWT with DID claim 235 236 **3. Service Tokens** ··· 666 667 **Authentication:** 668 - `ATCR_AUTH_KEY_PATH` - JWT signing key path (default: `/var/lib/atcr/auth/private-key.pem`) 669 - - `ATCR_TOKEN_EXPIRATION` - JWT expiration in seconds (default: 300) 670 671 **UI:** 672 - `ATCR_UI_DATABASE_PATH` - SQLite database path (default: `/var/lib/atcr/ui.db`)
··· 230 - **Issued by:** AppView after OAuth login 231 - **Stored in:** Docker credential helper (`~/.atcr/credential-helper-token.json`) 232 - **Used for:** Docker client → AppView authentication 233 + - **Lifetime:** 5 minutes 234 - **Format:** JWT with DID claim 235 236 **3. Service Tokens** ··· 666 667 **Authentication:** 668 - `ATCR_AUTH_KEY_PATH` - JWT signing key path (default: `/var/lib/atcr/auth/private-key.pem`) 669 670 **UI:** 671 - `ATCR_UI_DATABASE_PATH` - SQLite database path (default: `/var/lib/atcr/ui.db`)
+2 -2
cmd/appview/serve.go
··· 565 } 566 }() 567 568 - // Start periodic backfill scheduler 569 - interval := jetstreamCfg.BackfillInterval 570 571 go func() { 572 ticker := time.NewTicker(interval)
··· 565 } 566 }() 567 568 + // Start periodic backfill scheduler (hardcoded 1h interval) 569 + interval := 1 * time.Hour 570 571 go func() { 572 ticker := time.NewTicker(interval)
+20
cmd/hold/main.go
··· 12 13 "atcr.io/pkg/hold" 14 "atcr.io/pkg/hold/admin" 15 "atcr.io/pkg/hold/oci" 16 "atcr.io/pkg/hold/pds" 17 "atcr.io/pkg/hold/quota" ··· 122 123 // Create blob store adapter and XRPC handlers 124 var ociHandler *oci.XRPCHandler 125 if holdPDS != nil { 126 // Create storage driver from config 127 ctx := context.Background() ··· 142 143 // Create OCI XRPC handler (multipart upload endpoints) 144 ociHandler = oci.NewXRPCHandler(holdPDS, *s3Service, driver, cfg.Server.DisablePresignedURLs, cfg.Registration.EnableBlueskyPosts, nil, quotaMgr) 145 } 146 147 // Setup HTTP routes with chi router ··· 238 } 239 } 240 241 // Wait for signal or server error 242 select { 243 case err := <-serverErr: ··· 255 } else { 256 slog.Info("Status post set to offline") 257 } 258 } 259 260 // Close broadcaster database connection
··· 12 13 "atcr.io/pkg/hold" 14 "atcr.io/pkg/hold/admin" 15 + "atcr.io/pkg/hold/gc" 16 "atcr.io/pkg/hold/oci" 17 "atcr.io/pkg/hold/pds" 18 "atcr.io/pkg/hold/quota" ··· 123 124 // Create blob store adapter and XRPC handlers 125 var ociHandler *oci.XRPCHandler 126 + var garbageCollector *gc.GarbageCollector 127 if holdPDS != nil { 128 // Create storage driver from config 129 ctx := context.Background() ··· 144 145 // Create OCI XRPC handler (multipart upload endpoints) 146 ociHandler = oci.NewXRPCHandler(holdPDS, *s3Service, driver, cfg.Server.DisablePresignedURLs, cfg.Registration.EnableBlueskyPosts, nil, quotaMgr) 147 + 148 + // Initialize garbage collector 149 + gcConfig := gc.LoadConfigFromEnv() 150 + garbageCollector = gc.NewGarbageCollector(holdPDS, driver, gcConfig) 151 + slog.Info("Garbage collector initialized", 152 + "enabled", gcConfig.Enabled, 153 + "dryRun", gcConfig.DryRun) 154 } 155 156 // Setup HTTP routes with chi router ··· 247 } 248 } 249 250 + // Start garbage collector (runs on startup + nightly) 251 + if garbageCollector != nil { 252 + garbageCollector.Start(context.Background()) 253 + } 254 + 255 // Wait for signal or server error 256 select { 257 case err := <-serverErr: ··· 269 } else { 270 slog.Info("Status post set to offline") 271 } 272 + } 273 + 274 + // Stop garbage collector 275 + if garbageCollector != nil { 276 + garbageCollector.Stop() 277 + slog.Info("Garbage collector stopped") 278 } 279 280 // Close broadcaster database connection
-16
deploy/.env.prod.template
··· 142 # Uncomment to override if you want to use a different hold service as the default 143 # ATCR_DEFAULT_HOLD_DID=did:web:some-other-hold.example.com 144 145 - # JWT token expiration in seconds 146 - # Default: 300 (5 minutes) 147 - ATCR_TOKEN_EXPIRATION=300 148 - 149 # OAuth client display name (shown in authorization screens) 150 # Default: AT Container Registry 151 # ATCR_CLIENT_NAME=AT Container Registry ··· 178 # Default: https://relay1.us-east.bsky.network 179 ATCR_RELAY_ENDPOINT=https://relay1.us-east.bsky.network 180 181 - # Backfill interval 182 - # Examples: 30m, 1h, 2h, 24h 183 - # Default: 1h 184 - ATCR_BACKFILL_INTERVAL=1h 185 - 186 # ============================================================================== 187 # Optional: Filesystem Storage (alternative to S3) 188 # ============================================================================== ··· 194 195 # STORAGE_DRIVER=filesystem 196 # STORAGE_ROOT_DIR=/var/lib/atcr/hold 197 - 198 - # ============================================================================== 199 - # Advanced Configuration 200 - # ============================================================================== 201 - 202 - # Override service name (defaults to APPVIEW_DOMAIN) 203 - # ATCR_SERVICE_NAME=atcr.io 204 205 # ============================================================================== 206 # CHECKLIST
··· 142 # Uncomment to override if you want to use a different hold service as the default 143 # ATCR_DEFAULT_HOLD_DID=did:web:some-other-hold.example.com 144 145 # OAuth client display name (shown in authorization screens) 146 # Default: AT Container Registry 147 # ATCR_CLIENT_NAME=AT Container Registry ··· 174 # Default: https://relay1.us-east.bsky.network 175 ATCR_RELAY_ENDPOINT=https://relay1.us-east.bsky.network 176 177 # ============================================================================== 178 # Optional: Filesystem Storage (alternative to S3) 179 # ============================================================================== ··· 185 186 # STORAGE_DRIVER=filesystem 187 # STORAGE_ROOT_DIR=/var/lib/atcr/hold 188 189 # ============================================================================== 190 # CHECKLIST
-3
deploy/docker-compose.prod.yml
··· 48 # Server configuration 49 ATCR_HTTP_ADDR: :5000 50 ATCR_BASE_URL: https://${APPVIEW_DOMAIN:-atcr.io} 51 - ATCR_SERVICE_NAME: ${APPVIEW_DOMAIN:-atcr.io} 52 53 # Storage configuration (derived from HOLD_DOMAIN) 54 ATCR_DEFAULT_HOLD_DID: ${ATCR_DEFAULT_HOLD_DID:-did:web:${HOLD_DOMAIN:-hold01.atcr.io}} ··· 56 # Authentication 57 ATCR_AUTH_KEY_PATH: /var/lib/atcr/auth/private-key.pem 58 ATCR_AUTH_CERT_PATH: /var/lib/atcr/auth/private-key.crt 59 - ATCR_TOKEN_EXPIRATION: ${ATCR_TOKEN_EXPIRATION:-300} 60 61 # UI configuration 62 ATCR_UI_DATABASE_PATH: /var/lib/atcr/ui.db ··· 69 JETSTREAM_URL: ${JETSTREAM_URL:-wss://jetstream2.us-west.bsky.network/subscribe} 70 ATCR_BACKFILL_ENABLED: ${ATCR_BACKFILL_ENABLED:-true} 71 ATCR_RELAY_ENDPOINT: ${ATCR_RELAY_ENDPOINT:-https://relay1.us-east.bsky.network} 72 - ATCR_BACKFILL_INTERVAL: ${ATCR_BACKFILL_INTERVAL:-1h} 73 volumes: 74 # Persistent data: auth keys, UI database, OAuth tokens, Jetstream cache 75 - atcr-appview-data:/var/lib/atcr
··· 48 # Server configuration 49 ATCR_HTTP_ADDR: :5000 50 ATCR_BASE_URL: https://${APPVIEW_DOMAIN:-atcr.io} 51 52 # Storage configuration (derived from HOLD_DOMAIN) 53 ATCR_DEFAULT_HOLD_DID: ${ATCR_DEFAULT_HOLD_DID:-did:web:${HOLD_DOMAIN:-hold01.atcr.io}} ··· 55 # Authentication 56 ATCR_AUTH_KEY_PATH: /var/lib/atcr/auth/private-key.pem 57 ATCR_AUTH_CERT_PATH: /var/lib/atcr/auth/private-key.crt 58 59 # UI configuration 60 ATCR_UI_DATABASE_PATH: /var/lib/atcr/ui.db ··· 67 JETSTREAM_URL: ${JETSTREAM_URL:-wss://jetstream2.us-west.bsky.network/subscribe} 68 ATCR_BACKFILL_ENABLED: ${ATCR_BACKFILL_ENABLED:-true} 69 ATCR_RELAY_ENDPOINT: ${ATCR_RELAY_ENDPOINT:-https://relay1.us-east.bsky.network} 70 volumes: 71 # Persistent data: auth keys, UI database, OAuth tokens, Jetstream cache 72 - atcr-appview-data:/var/lib/atcr
-15
docs/appview.md
··· 110 - **Production:** Set to your public URL (e.g., `https://atcr.example.com`) 111 - **Example:** `https://atcr.io`, `http://127.0.0.1:5000` 112 113 - #### `ATCR_SERVICE_NAME` 114 - - **Default:** Derived from `ATCR_BASE_URL` hostname, or `atcr.io` 115 - - **Description:** Service name used for JWT `service` and `issuer` fields. Controls token scope. 116 - - **Example:** `atcr.io`, `registry.example.com` 117 - 118 ### Storage Configuration 119 120 #### `ATCR_DEFAULT_HOLD_DID` ⚠️ REQUIRED ··· 137 - **Default:** `/var/lib/atcr/auth/private-key.crt` 138 - **Description:** Path to JWT signing certificate. Auto-generated if missing. 139 - **Note:** Paired with `ATCR_AUTH_KEY_PATH` 140 - 141 - #### `ATCR_TOKEN_EXPIRATION` 142 - - **Default:** `300` (5 minutes) 143 - - **Description:** JWT token expiration in seconds. Registry JWTs are short-lived for security. 144 - - **Recommendation:** Keep between 300-900 seconds (5-15 minutes) 145 146 ### Web UI Configuration 147 ··· 199 - **Default:** `https://relay1.us-east.bsky.network` 200 - **Description:** ATProto relay endpoint for backfill sync API 201 - **Note:** Used when `ATCR_BACKFILL_ENABLED=true` 202 - 203 - #### `ATCR_BACKFILL_INTERVAL` 204 - - **Default:** `1h` 205 - - **Description:** How often to run backfill sync 206 - - **Format:** Duration string (e.g., `30m`, `1h`, `2h`, `24h`) 207 208 ### Legacy Configuration 209
··· 110 - **Production:** Set to your public URL (e.g., `https://atcr.example.com`) 111 - **Example:** `https://atcr.io`, `http://127.0.0.1:5000` 112 113 ### Storage Configuration 114 115 #### `ATCR_DEFAULT_HOLD_DID` ⚠️ REQUIRED ··· 132 - **Default:** `/var/lib/atcr/auth/private-key.crt` 133 - **Description:** Path to JWT signing certificate. Auto-generated if missing. 134 - **Note:** Paired with `ATCR_AUTH_KEY_PATH` 135 136 ### Web UI Configuration 137 ··· 189 - **Default:** `https://relay1.us-east.bsky.network` 190 - **Description:** ATProto relay endpoint for backfill sync API 191 - **Note:** Used when `ATCR_BACKFILL_ENABLED=true` 192 193 ### Legacy Configuration 194
+6 -20
pkg/appview/config.go
··· 102 // BackfillEnabled controls whether backfill is enabled (from env: ATCR_BACKFILL_ENABLED, default: true) 103 BackfillEnabled bool `yaml:"backfill_enabled"` 104 105 - // BackfillInterval is the backfill interval (from env: ATCR_BACKFILL_INTERVAL, default: 1h) 106 - BackfillInterval time.Duration `yaml:"backfill_interval"` 107 - 108 // RelayEndpoint is the relay endpoint for sync API (from env: ATCR_RELAY_ENDPOINT, default: https://relay1.us-east.bsky.network) 109 RelayEndpoint string `yaml:"relay_endpoint"` 110 } ··· 117 // CertPath is the JWT certificate path (from env: ATCR_AUTH_CERT_PATH, default: "/var/lib/atcr/auth/private-key.crt") 118 CertPath string `yaml:"cert_path"` 119 120 - // TokenExpiration is the JWT expiration duration (from env: ATCR_TOKEN_EXPIRATION, default: 300s) 121 TokenExpiration time.Duration `yaml:"token_expiration"` 122 123 // ServiceName is the service name used for JWT issuer and service fields 124 - // Derived from ATCR_SERVICE_NAME env var or extracted from base URL (e.g., "atcr.io") 125 ServiceName string `yaml:"service_name"` 126 } 127 ··· 176 // Jetstream configuration 177 cfg.Jetstream.URL = getEnvOrDefault("JETSTREAM_URL", "wss://jetstream2.us-west.bsky.network/subscribe") 178 cfg.Jetstream.BackfillEnabled = os.Getenv("ATCR_BACKFILL_ENABLED") != "false" 179 - cfg.Jetstream.BackfillInterval = getDurationOrDefault("ATCR_BACKFILL_INTERVAL", 1*time.Hour) 180 cfg.Jetstream.RelayEndpoint = getEnvOrDefault("ATCR_RELAY_ENDPOINT", "https://relay1.us-east.bsky.network") 181 182 // Auth configuration 183 cfg.Auth.KeyPath = getEnvOrDefault("ATCR_AUTH_KEY_PATH", "/var/lib/atcr/auth/private-key.pem") 184 cfg.Auth.CertPath = getEnvOrDefault("ATCR_AUTH_CERT_PATH", "/var/lib/atcr/auth/private-key.crt") 185 186 - // Parse token expiration (default: 300 seconds = 5 minutes) 187 - expirationStr := getEnvOrDefault("ATCR_TOKEN_EXPIRATION", "300") 188 - expirationSecs, err := strconv.Atoi(expirationStr) 189 - if err != nil { 190 - return nil, fmt.Errorf("invalid ATCR_TOKEN_EXPIRATION: %w", err) 191 - } 192 - cfg.Auth.TokenExpiration = time.Duration(expirationSecs) * time.Second 193 194 // Derive service name from base URL or env var (used for JWT issuer and service) 195 cfg.Auth.ServiceName = getServiceName(cfg.Server.BaseURL) ··· 336 } 337 } 338 339 - // getServiceName extracts service name from base URL or uses env var 340 func getServiceName(baseURL string) string { 341 - // Check env var first 342 - if serviceName := os.Getenv("ATCR_SERVICE_NAME"); serviceName != "" { 343 - return serviceName 344 - } 345 - 346 - // Try to extract from base URL 347 parsed, err := url.Parse(baseURL) 348 if err == nil && parsed.Hostname() != "" { 349 hostname := parsed.Hostname()
··· 102 // BackfillEnabled controls whether backfill is enabled (from env: ATCR_BACKFILL_ENABLED, default: true) 103 BackfillEnabled bool `yaml:"backfill_enabled"` 104 105 // RelayEndpoint is the relay endpoint for sync API (from env: ATCR_RELAY_ENDPOINT, default: https://relay1.us-east.bsky.network) 106 RelayEndpoint string `yaml:"relay_endpoint"` 107 } ··· 114 // CertPath is the JWT certificate path (from env: ATCR_AUTH_CERT_PATH, default: "/var/lib/atcr/auth/private-key.crt") 115 CertPath string `yaml:"cert_path"` 116 117 + // TokenExpiration is the JWT expiration duration (5 minutes) 118 TokenExpiration time.Duration `yaml:"token_expiration"` 119 120 // ServiceName is the service name used for JWT issuer and service fields 121 + // Derived from base URL hostname (e.g., "atcr.io") 122 ServiceName string `yaml:"service_name"` 123 } 124 ··· 173 // Jetstream configuration 174 cfg.Jetstream.URL = getEnvOrDefault("JETSTREAM_URL", "wss://jetstream2.us-west.bsky.network/subscribe") 175 cfg.Jetstream.BackfillEnabled = os.Getenv("ATCR_BACKFILL_ENABLED") != "false" 176 cfg.Jetstream.RelayEndpoint = getEnvOrDefault("ATCR_RELAY_ENDPOINT", "https://relay1.us-east.bsky.network") 177 178 // Auth configuration 179 cfg.Auth.KeyPath = getEnvOrDefault("ATCR_AUTH_KEY_PATH", "/var/lib/atcr/auth/private-key.pem") 180 cfg.Auth.CertPath = getEnvOrDefault("ATCR_AUTH_CERT_PATH", "/var/lib/atcr/auth/private-key.crt") 181 182 + // Token expiration: 5 minutes (not configurable) 183 + cfg.Auth.TokenExpiration = 5 * time.Minute 184 185 // Derive service name from base URL or env var (used for JWT issuer and service) 186 cfg.Auth.ServiceName = getServiceName(cfg.Server.BaseURL) ··· 327 } 328 } 329 330 + // getServiceName extracts service name from base URL hostname 331 func getServiceName(baseURL string) string { 332 + // Extract from base URL 333 parsed, err := url.Parse(baseURL) 334 if err == nil && parsed.Hostname() != "" { 335 hostname := parsed.Hostname()
+3 -24
pkg/appview/config_test.go
··· 8 9 func Test_getServiceName(t *testing.T) { 10 tests := []struct { 11 - name string 12 - baseURL string 13 - envService string 14 - setEnv bool 15 - want string 16 }{ 17 { 18 - name: "env var set", 19 - baseURL: "http://127.0.0.1:5000", 20 - envService: "custom.registry.io", 21 - setEnv: true, 22 - want: "custom.registry.io", 23 - }, 24 - { 25 name: "localhost - use default", 26 baseURL: "http://localhost:5000", 27 - setEnv: false, 28 want: "atcr.io", 29 }, 30 { 31 name: "127.0.0.1 - use default", 32 baseURL: "http://127.0.0.1:5000", 33 - setEnv: false, 34 want: "atcr.io", 35 }, 36 { 37 name: "custom domain", 38 baseURL: "https://registry.example.com", 39 - setEnv: false, 40 want: "registry.example.com", 41 }, 42 { 43 name: "domain with port", 44 baseURL: "https://registry.example.com:443", 45 - setEnv: false, 46 want: "registry.example.com", 47 }, 48 { 49 name: "invalid URL - use default", 50 baseURL: "://invalid", 51 - setEnv: false, 52 want: "atcr.io", 53 }, 54 } 55 56 for _, tt := range tests { 57 t.Run(tt.name, func(t *testing.T) { 58 - if tt.setEnv { 59 - t.Setenv("ATCR_SERVICE_NAME", tt.envService) 60 - } else { 61 - os.Unsetenv("ATCR_SERVICE_NAME") 62 - } 63 - 64 got := getServiceName(tt.baseURL) 65 if got != tt.want { 66 t.Errorf("getServiceName() = %v, want %v", got, tt.want) ··· 214 215 // Clear other env vars to use defaults 216 os.Unsetenv("ATCR_BASE_URL") 217 - os.Unsetenv("ATCR_SERVICE_NAME") 218 219 got, err := LoadConfigFromEnv() 220 if (err != nil) != tt.wantError {
··· 8 9 func Test_getServiceName(t *testing.T) { 10 tests := []struct { 11 + name string 12 + baseURL string 13 + want string 14 }{ 15 { 16 name: "localhost - use default", 17 baseURL: "http://localhost:5000", 18 want: "atcr.io", 19 }, 20 { 21 name: "127.0.0.1 - use default", 22 baseURL: "http://127.0.0.1:5000", 23 want: "atcr.io", 24 }, 25 { 26 name: "custom domain", 27 baseURL: "https://registry.example.com", 28 want: "registry.example.com", 29 }, 30 { 31 name: "domain with port", 32 baseURL: "https://registry.example.com:443", 33 want: "registry.example.com", 34 }, 35 { 36 name: "invalid URL - use default", 37 baseURL: "://invalid", 38 want: "atcr.io", 39 }, 40 } 41 42 for _, tt := range tests { 43 t.Run(tt.name, func(t *testing.T) { 44 got := getServiceName(tt.baseURL) 45 if got != tt.want { 46 t.Errorf("getServiceName() = %v, want %v", got, tt.want) ··· 194 195 // Clear other env vars to use defaults 196 os.Unsetenv("ATCR_BASE_URL") 197 198 got, err := LoadConfigFromEnv() 199 if (err != nil) != tt.wantError {
+37
pkg/hold/gc/config.go
···
··· 1 + // Package gc implements garbage collection for the hold service. 2 + // It periodically cleans up orphaned blobs from S3 storage based on 3 + // layer records in the hold's embedded PDS. 4 + package gc 5 + 6 + import ( 7 + "os" 8 + "time" 9 + ) 10 + 11 + // Hardcoded defaults - keep configuration simple 12 + const ( 13 + // gcInterval is how often GC runs (nightly) 14 + gcInterval = 24 * time.Hour 15 + 16 + // gcGracePeriod is how old a layer record must be before it's considered for GC. 17 + // Records created in the last 7 days are skipped (GDPR/CCPA compliant). 18 + gcGracePeriod = 7 * 24 * time.Hour 19 + ) 20 + 21 + // Config holds GC configuration, loaded from environment variables 22 + type Config struct { 23 + // Enabled controls whether GC is active (GC_ENABLED, default: true) 24 + Enabled bool 25 + 26 + // DryRun logs what would be deleted without actually deleting (GC_DRY_RUN, default: true) 27 + // Remove after initial validation 28 + DryRun bool 29 + } 30 + 31 + // LoadConfigFromEnv loads GC configuration from environment variables 32 + func LoadConfigFromEnv() Config { 33 + return Config{ 34 + Enabled: os.Getenv("GC_ENABLED") != "false", // Default true 35 + DryRun: os.Getenv("GC_DRY_RUN") != "false", // Default true 36 + } 37 + }
+446
pkg/hold/gc/gc.go
···
··· 1 + package gc 2 + 3 + import ( 4 + "bytes" 5 + "context" 6 + "encoding/json" 7 + "fmt" 8 + "io" 9 + "log/slog" 10 + "net/http" 11 + "regexp" 12 + "strings" 13 + "sync" 14 + "time" 15 + 16 + "atcr.io/pkg/atproto" 17 + "atcr.io/pkg/hold/pds" 18 + "github.com/bluesky-social/indigo/atproto/syntax" 19 + storagedriver "github.com/distribution/distribution/v3/registry/storage/driver" 20 + ) 21 + 22 + // GarbageCollector handles cleanup of orphaned blobs from storage 23 + type GarbageCollector struct { 24 + pds *pds.HoldPDS 25 + driver storagedriver.StorageDriver 26 + cfg Config 27 + logger *slog.Logger 28 + 29 + // stopCh signals the background goroutine to stop 30 + stopCh chan struct{} 31 + // wg tracks the background goroutine 32 + wg sync.WaitGroup 33 + } 34 + 35 + // GCResult contains statistics from a GC run 36 + type GCResult struct { 37 + BlobsDeleted int64 `json:"blobs_deleted"` 38 + BytesReclaimed int64 `json:"bytes_reclaimed"` 39 + RecordsDeleted int64 `json:"records_deleted"` 40 + OrphanedRecords int64 `json:"orphaned_records"` 41 + OrphanedBlobs int64 `json:"orphaned_blobs"` 42 + ReferencedBlobs int64 `json:"referenced_blobs"` 43 + Duration time.Duration `json:"duration"` 44 + } 45 + 46 + // NewGarbageCollector creates a new GC instance 47 + func NewGarbageCollector(holdPDS *pds.HoldPDS, driver storagedriver.StorageDriver, cfg Config) *GarbageCollector { 48 + return &GarbageCollector{ 49 + pds: holdPDS, 50 + driver: driver, 51 + cfg: cfg, 52 + logger: slog.Default().With("component", "gc"), 53 + stopCh: make(chan struct{}), 54 + } 55 + } 56 + 57 + // Start begins the GC background process 58 + // It runs GC immediately on startup, then periodically according to gcInterval 59 + func (gc *GarbageCollector) Start(ctx context.Context) { 60 + if !gc.cfg.Enabled { 61 + gc.logger.Info("GC disabled") 62 + return 63 + } 64 + 65 + // Run on startup 66 + gc.logger.Info("Running GC on startup", "dryRun", gc.cfg.DryRun) 67 + result, err := gc.Run(ctx) 68 + if err != nil { 69 + gc.logger.Error("Startup GC failed", "error", err) 70 + } else { 71 + gc.logResult(result) 72 + } 73 + 74 + // Start background ticker for nightly runs 75 + gc.wg.Add(1) 76 + go func() { 77 + defer gc.wg.Done() 78 + 79 + ticker := time.NewTicker(gcInterval) 80 + defer ticker.Stop() 81 + 82 + for { 83 + select { 84 + case <-gc.stopCh: 85 + gc.logger.Info("GC background process stopped") 86 + return 87 + case <-ctx.Done(): 88 + gc.logger.Info("GC context cancelled") 89 + return 90 + case <-ticker.C: 91 + gc.logger.Info("Running nightly GC", "dryRun", gc.cfg.DryRun) 92 + result, err := gc.Run(ctx) 93 + if err != nil { 94 + gc.logger.Error("Nightly GC failed", "error", err) 95 + } else { 96 + gc.logResult(result) 97 + } 98 + } 99 + } 100 + }() 101 + 102 + gc.logger.Info("GC background process started", "interval", gcInterval) 103 + } 104 + 105 + // Stop gracefully stops the GC background process 106 + func (gc *GarbageCollector) Stop() { 107 + close(gc.stopCh) 108 + gc.wg.Wait() 109 + } 110 + 111 + // Run executes a single GC cycle 112 + func (gc *GarbageCollector) Run(ctx context.Context) (*GCResult, error) { 113 + start := time.Now() 114 + result := &GCResult{} 115 + 116 + gc.logger.Info("Starting GC run", "dryRun", gc.cfg.DryRun) 117 + 118 + // Phase 1: Build referenced set from layer records 119 + referenced, orphanedRecords, err := gc.buildReferencedSet(ctx, result) 120 + if err != nil { 121 + return nil, fmt.Errorf("phase 1 (build referenced set) failed: %w", err) 122 + } 123 + 124 + gc.logger.Info("Phase 1 complete", 125 + "referenced", len(referenced), 126 + "orphanedRecords", len(orphanedRecords)) 127 + 128 + // Phase 2: Delete orphaned layer records 129 + if err := gc.deleteOrphanedRecords(ctx, orphanedRecords, result); err != nil { 130 + gc.logger.Error("Phase 2 (delete orphaned records) failed", "error", err) 131 + // Continue to phase 3 - we can still clean up blobs 132 + } 133 + 134 + // Phase 3: Walk storage and delete unreferenced blobs 135 + if err := gc.deleteOrphanedBlobs(ctx, referenced, result); err != nil { 136 + return nil, fmt.Errorf("phase 3 (delete orphaned blobs) failed: %w", err) 137 + } 138 + 139 + result.Duration = time.Since(start) 140 + result.ReferencedBlobs = int64(len(referenced)) 141 + 142 + return result, nil 143 + } 144 + 145 + // buildReferencedSet iterates layer records and builds a set of referenced digests 146 + // Returns: referenced digest set, list of orphaned record rkeys, error 147 + func (gc *GarbageCollector) buildReferencedSet(ctx context.Context, result *GCResult) (map[string]bool, []string, error) { 148 + referenced := make(map[string]bool) 149 + var orphanedRecords []string 150 + 151 + recordsIndex := gc.pds.RecordsIndex() 152 + if recordsIndex == nil { 153 + return nil, nil, fmt.Errorf("records index not available") 154 + } 155 + 156 + cursor := "" 157 + batchSize := 1000 158 + totalRecords := 0 159 + 160 + for { 161 + records, nextCursor, err := recordsIndex.ListRecords(atproto.LayerCollection, batchSize, cursor, true) 162 + if err != nil { 163 + return nil, nil, fmt.Errorf("failed to list layer records: %w", err) 164 + } 165 + 166 + for _, rec := range records { 167 + totalRecords++ 168 + 169 + // Decode the layer record 170 + layer, err := gc.decodeLayerRecord(ctx, rec) 171 + if err != nil { 172 + gc.logger.Warn("Failed to decode layer record", "rkey", rec.Rkey, "error", err) 173 + continue 174 + } 175 + 176 + // Grace period: skip records from last 7 days 177 + recordTime := tidToTime(rec.Rkey) 178 + if time.Since(recordTime) < gcGracePeriod { 179 + // Recent record - assume referenced, skip checking 180 + referenced[layer.Digest] = true 181 + continue 182 + } 183 + 184 + // Cross-check: does the manifest still exist? 185 + if gc.manifestExists(ctx, layer.Manifest) { 186 + referenced[layer.Digest] = true 187 + } else { 188 + result.OrphanedRecords++ 189 + orphanedRecords = append(orphanedRecords, rec.Rkey) 190 + gc.logger.Debug("Found orphaned layer record", 191 + "rkey", rec.Rkey, 192 + "digest", layer.Digest, 193 + "manifest", layer.Manifest) 194 + } 195 + } 196 + 197 + if nextCursor == "" { 198 + break 199 + } 200 + cursor = nextCursor 201 + 202 + // Progress logging 203 + if totalRecords%10000 == 0 { 204 + gc.logger.Info("Phase 1 progress", "processed", totalRecords) 205 + } 206 + } 207 + 208 + gc.logger.Info("Scanned layer records", "total", totalRecords) 209 + return referenced, orphanedRecords, nil 210 + } 211 + 212 + // deleteOrphanedRecords removes layer records whose manifests no longer exist 213 + func (gc *GarbageCollector) deleteOrphanedRecords(ctx context.Context, orphanedRkeys []string, result *GCResult) error { 214 + for _, rkey := range orphanedRkeys { 215 + if gc.cfg.DryRun { 216 + gc.logger.Info("DRY-RUN: Would delete layer record", "rkey", rkey) 217 + } else { 218 + if err := gc.pds.DeleteLayerRecord(ctx, rkey); err != nil { 219 + gc.logger.Error("Failed to delete layer record", "rkey", rkey, "error", err) 220 + continue 221 + } 222 + result.RecordsDeleted++ 223 + gc.logger.Debug("Deleted orphaned layer record", "rkey", rkey) 224 + } 225 + } 226 + 227 + gc.logger.Info("Phase 2 complete", 228 + "orphaned", len(orphanedRkeys), 229 + "deleted", result.RecordsDeleted, 230 + "dryRun", gc.cfg.DryRun) 231 + 232 + return nil 233 + } 234 + 235 + // deleteOrphanedBlobs walks storage and deletes blobs not in the referenced set 236 + func (gc *GarbageCollector) deleteOrphanedBlobs(ctx context.Context, referenced map[string]bool, result *GCResult) error { 237 + blobsPath := "/docker/registry/v2/blobs" 238 + 239 + err := gc.driver.Walk(ctx, blobsPath, func(fi storagedriver.FileInfo) error { 240 + if fi.IsDir() { 241 + return nil 242 + } 243 + 244 + // Only process data files 245 + if !strings.HasSuffix(fi.Path(), "/data") { 246 + return nil 247 + } 248 + 249 + // Extract digest from path 250 + digest := extractDigestFromPath(fi.Path()) 251 + if digest == "" { 252 + return nil 253 + } 254 + 255 + // Check if referenced by any layer record 256 + if referenced[digest] { 257 + return nil 258 + } 259 + 260 + result.OrphanedBlobs++ 261 + 262 + if gc.cfg.DryRun { 263 + gc.logger.Info("DRY-RUN: Would delete blob", 264 + "digest", digest, 265 + "size", fi.Size()) 266 + } else { 267 + if err := gc.driver.Delete(ctx, fi.Path()); err != nil { 268 + gc.logger.Error("Failed to delete blob", "path", fi.Path(), "error", err) 269 + return nil // Continue with other blobs 270 + } 271 + result.BlobsDeleted++ 272 + result.BytesReclaimed += fi.Size() 273 + gc.logger.Debug("Deleted orphaned blob", 274 + "digest", digest, 275 + "size", fi.Size()) 276 + } 277 + 278 + return nil 279 + }) 280 + 281 + if err != nil { 282 + return fmt.Errorf("walk storage failed: %w", err) 283 + } 284 + 285 + gc.logger.Info("Phase 3 complete", 286 + "orphanedBlobs", result.OrphanedBlobs, 287 + "deleted", result.BlobsDeleted, 288 + "reclaimed", result.BytesReclaimed, 289 + "dryRun", gc.cfg.DryRun) 290 + 291 + return nil 292 + } 293 + 294 + // decodeLayerRecord reads and decodes a layer record from the PDS 295 + func (gc *GarbageCollector) decodeLayerRecord(ctx context.Context, rec pds.Record) (*atproto.LayerRecord, error) { 296 + // Get the record from the repo 297 + recordPath := rec.Collection + "/" + rec.Rkey 298 + _, recBytes, err := gc.pds.GetRecordBytes(ctx, recordPath) 299 + if err != nil { 300 + return nil, fmt.Errorf("get record bytes: %w", err) 301 + } 302 + 303 + // Decode the layer record 304 + var layer atproto.LayerRecord 305 + if err := layer.UnmarshalCBOR(bytes.NewReader(*recBytes)); err != nil { 306 + return nil, fmt.Errorf("unmarshal CBOR: %w", err) 307 + } 308 + 309 + return &layer, nil 310 + } 311 + 312 + // manifestExists checks if a manifest still exists at the given AT-URI 313 + func (gc *GarbageCollector) manifestExists(ctx context.Context, manifestURI string) bool { 314 + // Parse AT-URI: at://did:plc:xxx/io.atcr.manifest/abc123 315 + parts := parseATURI(manifestURI) 316 + if parts == nil { 317 + gc.logger.Debug("Could not parse manifest URI", "uri", manifestURI) 318 + return false // Can't parse, assume orphaned 319 + } 320 + 321 + // Check if the manifest record still exists via XRPC 322 + exists, err := gc.checkManifestViaXRPC(ctx, parts.DID, parts.Collection, parts.Rkey) 323 + if err != nil { 324 + // Network error - assume manifest exists (safe default) 325 + gc.logger.Warn("Failed to check manifest existence, assuming exists", 326 + "uri", manifestURI, 327 + "error", err) 328 + return true 329 + } 330 + 331 + return exists 332 + } 333 + 334 + // atURIParts contains parsed components of an AT-URI 335 + type atURIParts struct { 336 + DID string 337 + Collection string 338 + Rkey string 339 + } 340 + 341 + // parseATURI parses an AT-URI into its components 342 + // Format: at://did:plc:xxx/collection/rkey 343 + func parseATURI(uri string) *atURIParts { 344 + if !strings.HasPrefix(uri, "at://") { 345 + return nil 346 + } 347 + 348 + // Remove at:// prefix 349 + path := strings.TrimPrefix(uri, "at://") 350 + 351 + // Split by / 352 + parts := strings.SplitN(path, "/", 3) 353 + if len(parts) != 3 { 354 + return nil 355 + } 356 + 357 + return &atURIParts{ 358 + DID: parts[0], 359 + Collection: parts[1], 360 + Rkey: parts[2], 361 + } 362 + } 363 + 364 + // checkManifestViaXRPC checks if a manifest record exists by querying the user's PDS 365 + func (gc *GarbageCollector) checkManifestViaXRPC(ctx context.Context, did, collection, rkey string) (bool, error) { 366 + // Resolve DID to PDS endpoint 367 + pdsEndpoint, err := atproto.ResolveDIDToPDS(ctx, did) 368 + if err != nil { 369 + return false, fmt.Errorf("resolve PDS: %w", err) 370 + } 371 + 372 + // Build XRPC URL 373 + url := fmt.Sprintf("%s/xrpc/com.atproto.repo.getRecord?repo=%s&collection=%s&rkey=%s", 374 + pdsEndpoint, did, collection, rkey) 375 + 376 + // Make request with timeout 377 + client := &http.Client{Timeout: 10 * time.Second} 378 + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) 379 + if err != nil { 380 + return false, fmt.Errorf("create request: %w", err) 381 + } 382 + 383 + resp, err := client.Do(req) 384 + if err != nil { 385 + return false, fmt.Errorf("http request: %w", err) 386 + } 387 + defer resp.Body.Close() 388 + 389 + // Consume body to allow connection reuse 390 + _, _ = io.Copy(io.Discard, resp.Body) 391 + 392 + switch resp.StatusCode { 393 + case http.StatusOK: 394 + return true, nil 395 + case http.StatusNotFound, http.StatusBadRequest: 396 + // Record doesn't exist 397 + return false, nil 398 + default: 399 + // Read error body for debugging 400 + body, _ := io.ReadAll(resp.Body) 401 + return false, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body)) 402 + } 403 + } 404 + 405 + // tidToTime extracts the timestamp from a TID (Timestamp ID) 406 + // TIDs are 13-character base32 encoded timestamps with counter 407 + func tidToTime(tid string) time.Time { 408 + // TIDs are base32-sortable timestamps 409 + // Use indigo's syntax package for proper parsing 410 + t, err := syntax.ParseTID(tid) 411 + if err != nil { 412 + // Return zero time - will be older than grace period 413 + return time.Time{} 414 + } 415 + return t.Time() 416 + } 417 + 418 + // extractDigestFromPath extracts a digest from a storage path 419 + // Path format: /docker/registry/v2/blobs/{algorithm}/{xx}/{hash}/data 420 + // Returns: {algorithm}:{hash} 421 + func extractDigestFromPath(path string) string { 422 + // Match pattern: /blobs/{alg}/{xx}/{hash}/data 423 + re := regexp.MustCompile(`/blobs/([^/]+)/[^/]+/([^/]+)/data$`) 424 + matches := re.FindStringSubmatch(path) 425 + if len(matches) != 3 { 426 + return "" 427 + } 428 + return matches[1] + ":" + matches[2] 429 + } 430 + 431 + // logResult logs the GC result in a structured format 432 + func (gc *GarbageCollector) logResult(result *GCResult) { 433 + gc.logger.Info("GC run complete", 434 + "duration", result.Duration, 435 + "referencedBlobs", result.ReferencedBlobs, 436 + "orphanedRecords", result.OrphanedRecords, 437 + "recordsDeleted", result.RecordsDeleted, 438 + "orphanedBlobs", result.OrphanedBlobs, 439 + "blobsDeleted", result.BlobsDeleted, 440 + "bytesReclaimed", result.BytesReclaimed, 441 + "dryRun", gc.cfg.DryRun) 442 + 443 + // Also log as JSON for easier parsing 444 + resultJSON, _ := json.Marshal(result) 445 + gc.logger.Debug("GC result JSON", "result", string(resultJSON)) 446 + }
+228
pkg/hold/gc/gc_test.go
···
··· 1 + package gc 2 + 3 + import ( 4 + "testing" 5 + "time" 6 + ) 7 + 8 + func TestExtractDigestFromPath(t *testing.T) { 9 + tests := []struct { 10 + name string 11 + path string 12 + expected string 13 + }{ 14 + { 15 + name: "valid sha256 path", 16 + path: "/docker/registry/v2/blobs/sha256/ab/abc123def456/data", 17 + expected: "sha256:abc123def456", 18 + }, 19 + { 20 + name: "valid sha256 path with full hash", 21 + path: "/docker/registry/v2/blobs/sha256/e3/e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855/data", 22 + expected: "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", 23 + }, 24 + { 25 + name: "invalid path - no data suffix", 26 + path: "/docker/registry/v2/blobs/sha256/ab/abc123def456", 27 + expected: "", 28 + }, 29 + { 30 + name: "invalid path - wrong structure", 31 + path: "/some/other/path/data", 32 + expected: "", 33 + }, 34 + { 35 + name: "empty path", 36 + path: "", 37 + expected: "", 38 + }, 39 + { 40 + name: "uploads temp path (should not match)", 41 + path: "/docker/registry/v2/uploads/temp-uuid/data", 42 + expected: "", 43 + }, 44 + } 45 + 46 + for _, tt := range tests { 47 + t.Run(tt.name, func(t *testing.T) { 48 + result := extractDigestFromPath(tt.path) 49 + if result != tt.expected { 50 + t.Errorf("extractDigestFromPath(%q) = %q, want %q", tt.path, result, tt.expected) 51 + } 52 + }) 53 + } 54 + } 55 + 56 + func TestParseATURI(t *testing.T) { 57 + tests := []struct { 58 + name string 59 + uri string 60 + expectNil bool 61 + did string 62 + collection string 63 + rkey string 64 + }{ 65 + { 66 + name: "valid AT-URI", 67 + uri: "at://did:plc:abc123/io.atcr.manifest/xyz789", 68 + expectNil: false, 69 + did: "did:plc:abc123", 70 + collection: "io.atcr.manifest", 71 + rkey: "xyz789", 72 + }, 73 + { 74 + name: "valid AT-URI with did:web", 75 + uri: "at://did:web:example.com/io.atcr.manifest/manifest123", 76 + expectNil: false, 77 + did: "did:web:example.com", 78 + collection: "io.atcr.manifest", 79 + rkey: "manifest123", 80 + }, 81 + { 82 + name: "invalid - no at:// prefix", 83 + uri: "did:plc:abc123/io.atcr.manifest/xyz789", 84 + expectNil: true, 85 + }, 86 + { 87 + name: "invalid - missing rkey", 88 + uri: "at://did:plc:abc123/io.atcr.manifest", 89 + expectNil: true, 90 + }, 91 + { 92 + name: "invalid - empty string", 93 + uri: "", 94 + expectNil: true, 95 + }, 96 + { 97 + name: "invalid - http URL", 98 + uri: "https://example.com/xrpc/com.atproto.repo.getRecord", 99 + expectNil: true, 100 + }, 101 + } 102 + 103 + for _, tt := range tests { 104 + t.Run(tt.name, func(t *testing.T) { 105 + result := parseATURI(tt.uri) 106 + if tt.expectNil { 107 + if result != nil { 108 + t.Errorf("parseATURI(%q) = %+v, want nil", tt.uri, result) 109 + } 110 + return 111 + } 112 + 113 + if result == nil { 114 + t.Errorf("parseATURI(%q) = nil, want non-nil", tt.uri) 115 + return 116 + } 117 + 118 + if result.DID != tt.did { 119 + t.Errorf("parseATURI(%q).DID = %q, want %q", tt.uri, result.DID, tt.did) 120 + } 121 + if result.Collection != tt.collection { 122 + t.Errorf("parseATURI(%q).Collection = %q, want %q", tt.uri, result.Collection, tt.collection) 123 + } 124 + if result.Rkey != tt.rkey { 125 + t.Errorf("parseATURI(%q).Rkey = %q, want %q", tt.uri, result.Rkey, tt.rkey) 126 + } 127 + }) 128 + } 129 + } 130 + 131 + func TestTidToTime(t *testing.T) { 132 + // Test with known TID format 133 + // TIDs are base32-encoded timestamps with counter 134 + tests := []struct { 135 + name string 136 + tid string 137 + expectZero bool 138 + minAge time.Duration // Minimum expected age (roughly) 139 + }{ 140 + { 141 + name: "valid TID from 2024", 142 + tid: "3l7nqy25tks2c", // A real TID from around 2024 143 + expectZero: false, 144 + }, 145 + { 146 + name: "invalid TID - too short", 147 + tid: "abc", 148 + expectZero: true, 149 + }, 150 + { 151 + name: "invalid TID - empty", 152 + tid: "", 153 + expectZero: true, 154 + }, 155 + { 156 + name: "invalid TID - not base32", 157 + tid: "!!!!!!!!!!!!!!", 158 + expectZero: true, 159 + }, 160 + } 161 + 162 + for _, tt := range tests { 163 + t.Run(tt.name, func(t *testing.T) { 164 + result := tidToTime(tt.tid) 165 + if tt.expectZero { 166 + if !result.IsZero() { 167 + t.Errorf("tidToTime(%q) = %v, want zero time", tt.tid, result) 168 + } 169 + return 170 + } 171 + 172 + if result.IsZero() { 173 + t.Errorf("tidToTime(%q) = zero time, want non-zero", tt.tid) 174 + } 175 + }) 176 + } 177 + } 178 + 179 + func TestLoadConfigFromEnv(t *testing.T) { 180 + // Test default values 181 + t.Run("default values", func(t *testing.T) { 182 + // Clear any existing env vars 183 + t.Setenv("GC_ENABLED", "") 184 + t.Setenv("GC_DRY_RUN", "") 185 + 186 + cfg := LoadConfigFromEnv() 187 + 188 + // Default: enabled 189 + if !cfg.Enabled { 190 + t.Error("expected Enabled to be true by default") 191 + } 192 + 193 + // Default: dry run enabled 194 + if !cfg.DryRun { 195 + t.Error("expected DryRun to be true by default") 196 + } 197 + }) 198 + 199 + t.Run("disabled via env", func(t *testing.T) { 200 + t.Setenv("GC_ENABLED", "false") 201 + t.Setenv("GC_DRY_RUN", "false") 202 + 203 + cfg := LoadConfigFromEnv() 204 + 205 + if cfg.Enabled { 206 + t.Error("expected Enabled to be false when GC_ENABLED=false") 207 + } 208 + 209 + if cfg.DryRun { 210 + t.Error("expected DryRun to be false when GC_DRY_RUN=false") 211 + } 212 + }) 213 + 214 + t.Run("enabled via env", func(t *testing.T) { 215 + t.Setenv("GC_ENABLED", "true") 216 + t.Setenv("GC_DRY_RUN", "true") 217 + 218 + cfg := LoadConfigFromEnv() 219 + 220 + if !cfg.Enabled { 221 + t.Error("expected Enabled to be true when GC_ENABLED=true") 222 + } 223 + 224 + if !cfg.DryRun { 225 + t.Error("expected DryRun to be true when GC_DRY_RUN=true") 226 + } 227 + }) 228 + }
+19
pkg/hold/pds/layer.go
··· 49 return nil, fmt.Errorf("GetLayerRecord not yet implemented - use via XRPC listRecords instead") 50 } 51 52 // ListLayerRecords lists layer records with pagination 53 // Returns records, next cursor (empty if no more), and error 54 // Note: This is a simplified implementation. For production, consider adding filters
··· 49 return nil, fmt.Errorf("GetLayerRecord not yet implemented - use via XRPC listRecords instead") 50 } 51 52 + // DeleteLayerRecord deletes a layer record by rkey 53 + // This deletes from both the repo (MST) and the records index 54 + func (p *HoldPDS) DeleteLayerRecord(ctx context.Context, rkey string) error { 55 + // Delete from repo (MST) 56 + if err := p.repomgr.DeleteRecord(ctx, p.uid, atproto.LayerCollection, rkey); err != nil { 57 + return fmt.Errorf("failed to delete from repo: %w", err) 58 + } 59 + 60 + // Delete from index 61 + if p.recordsIndex != nil { 62 + if err := p.recordsIndex.DeleteRecord(atproto.LayerCollection, rkey); err != nil { 63 + // Log but don't fail - index will resync on backfill 64 + fmt.Printf("Warning: failed to delete from records index: %v\n", err) 65 + } 66 + } 67 + 68 + return nil 69 + } 70 + 71 // ListLayerRecords lists layer records with pagination 72 // Returns records, next cursor (empty if no more), and error 73 // Note: This is a simplified implementation. For production, consider adding filters
+30
pkg/hold/pds/server.go
··· 152 return p.uid 153 } 154 155 // Bootstrap initializes the hold with the captain record, owner as first crew member, and profile 156 func (p *HoldPDS) Bootstrap(ctx context.Context, storageDriver driver.StorageDriver, ownerDID string, public bool, allowAllCrew bool, avatarURL, region string) error { 157 if ownerDID == "" {
··· 152 return p.uid 153 } 154 155 + // GetRecordBytes retrieves raw CBOR bytes for a record 156 + // recordPath format: "collection/rkey" 157 + func (p *HoldPDS) GetRecordBytes(ctx context.Context, recordPath string) (cid.Cid, *[]byte, error) { 158 + session, err := p.carstore.ReadOnlySession(p.uid) 159 + if err != nil { 160 + return cid.Undef, nil, fmt.Errorf("failed to create session: %w", err) 161 + } 162 + 163 + head, err := p.carstore.GetUserRepoHead(ctx, p.uid) 164 + if err != nil { 165 + return cid.Undef, nil, fmt.Errorf("failed to get repo head: %w", err) 166 + } 167 + 168 + if !head.Defined() { 169 + return cid.Undef, nil, fmt.Errorf("repo is empty") 170 + } 171 + 172 + repoHandle, err := repo.OpenRepo(ctx, session, head) 173 + if err != nil { 174 + return cid.Undef, nil, fmt.Errorf("failed to open repo: %w", err) 175 + } 176 + 177 + recordCID, recBytes, err := repoHandle.GetRecordBytes(ctx, recordPath) 178 + if err != nil { 179 + return cid.Undef, nil, fmt.Errorf("failed to get record: %w", err) 180 + } 181 + 182 + return recordCID, recBytes, nil 183 + } 184 + 185 // Bootstrap initializes the hold with the captain record, owner as first crew member, and profile 186 func (p *HoldPDS) Bootstrap(ctx context.Context, storageDriver driver.StorageDriver, ownerDID string, public bool, allowAllCrew bool, avatarURL, region string) error { 187 if ownerDID == "" {