A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go

remove duplicate data from exporter

evan.jarrett.net 263ec4b7 ab7e7c7a

verified
Changed files
+56 -140
pkg
appview
db
handlers
-108
pkg/appview/db/export.go
··· 4 "database/sql" 5 "fmt" 6 "time" 7 - 8 - "atcr.io/pkg/atproto" 9 ) 10 11 // UserDataExport represents the GDPR-compliant data export for a user ··· 18 OAuthSessions []OAuthSessionExport `json:"oauth_sessions"` 19 UISessions []UISessionExport `json:"ui_sessions"` 20 HoldMemberships HoldMembershipsExport `json:"hold_memberships"` 21 - KnownHolds KnownHoldsExport `json:"known_holds"` 22 CachedDataNote CachedDataNote `json:"cached_data_note"` 23 } 24 ··· 68 LastDeniedAt time.Time `json:"last_denied_at"` 69 } 70 71 - // KnownHoldsExport lists holds where the user has interacted 72 - type KnownHoldsExport struct { 73 - Note string `json:"note"` 74 - Holds []KnownHoldExport `json:"holds"` 75 - } 76 - 77 - // KnownHoldExport represents a hold the user has interacted with 78 - type KnownHoldExport struct { 79 - HoldDID string `json:"hold_did"` 80 - Relationship string `json:"relationship"` // "captain", "crew_member" 81 - FirstSeen time.Time `json:"first_seen"` 82 - ExportEndpoint string `json:"export_endpoint"` 83 - } 84 85 // CachedDataNote explains what cached data exists and how to access it 86 type CachedDataNote struct { ··· 126 return nil, fmt.Errorf("failed to get hold memberships: %w", err) 127 } 128 export.HoldMemberships = memberships 129 - 130 - // Get known holds (where user is captain or crew) 131 - knownHolds, err := getKnownHoldsForExport(db, did) 132 - if err != nil { 133 - return nil, fmt.Errorf("failed to get known holds: %w", err) 134 - } 135 - export.KnownHolds = knownHolds 136 137 // Add cached data note 138 export.CachedDataNote = CachedDataNote{ ··· 306 307 return memberships, denialRows.Err() 308 } 309 - 310 - // getKnownHoldsForExport retrieves holds where user is captain or crew member 311 - func getKnownHoldsForExport(db *sql.DB, did string) (KnownHoldsExport, error) { 312 - known := KnownHoldsExport{ 313 - Note: "Hold services where you have interacted. Each hold stores its own records about you. Contact each hold directly to export that data.", 314 - Holds: []KnownHoldExport{}, 315 - } 316 - 317 - // Get holds where user is captain 318 - captainRows, err := db.Query(` 319 - SELECT hold_did, updated_at 320 - FROM hold_captain_records 321 - WHERE owner_did = ? 322 - ORDER BY updated_at DESC 323 - `, did) 324 - if err != nil { 325 - return known, err 326 - } 327 - defer captainRows.Close() 328 - 329 - for captainRows.Next() { 330 - var holdDID string 331 - var updatedAt time.Time 332 - err := captainRows.Scan(&holdDID, &updatedAt) 333 - if err != nil { 334 - return known, err 335 - } 336 - known.Holds = append(known.Holds, KnownHoldExport{ 337 - HoldDID: holdDID, 338 - Relationship: "captain", 339 - FirstSeen: updatedAt, 340 - ExportEndpoint: resolveHoldExportEndpoint(holdDID), 341 - }) 342 - } 343 - if err := captainRows.Err(); err != nil { 344 - return known, err 345 - } 346 - 347 - // Get holds where user is crew member 348 - crewRows, err := db.Query(` 349 - SELECT hold_did, created_at 350 - FROM hold_crew_members 351 - WHERE member_did = ? 352 - ORDER BY created_at DESC 353 - `, did) 354 - if err != nil { 355 - return known, err 356 - } 357 - defer crewRows.Close() 358 - 359 - for crewRows.Next() { 360 - var holdDID string 361 - var createdAt time.Time 362 - err := crewRows.Scan(&holdDID, &createdAt) 363 - if err != nil { 364 - return known, err 365 - } 366 - 367 - // Check if already added as captain 368 - alreadyAdded := false 369 - for _, h := range known.Holds { 370 - if h.HoldDID == holdDID { 371 - alreadyAdded = true 372 - break 373 - } 374 - } 375 - 376 - if !alreadyAdded { 377 - known.Holds = append(known.Holds, KnownHoldExport{ 378 - HoldDID: holdDID, 379 - Relationship: "crew_member", 380 - FirstSeen: createdAt, 381 - ExportEndpoint: resolveHoldExportEndpoint(holdDID), 382 - }) 383 - } 384 - } 385 - 386 - return known, crewRows.Err() 387 - } 388 - 389 - // resolveHoldExportEndpoint converts a hold DID to its export endpoint URL 390 - // Uses the shared ResolveHoldURL for did:web resolution 391 - func resolveHoldExportEndpoint(holdDID string) string { 392 - return atproto.ResolveHoldURL(holdDID) + atproto.HoldExportUserData 393 - }
··· 4 "database/sql" 5 "fmt" 6 "time" 7 ) 8 9 // UserDataExport represents the GDPR-compliant data export for a user ··· 16 OAuthSessions []OAuthSessionExport `json:"oauth_sessions"` 17 UISessions []UISessionExport `json:"ui_sessions"` 18 HoldMemberships HoldMembershipsExport `json:"hold_memberships"` 19 CachedDataNote CachedDataNote `json:"cached_data_note"` 20 } 21 ··· 65 LastDeniedAt time.Time `json:"last_denied_at"` 66 } 67 68 69 // CachedDataNote explains what cached data exists and how to access it 70 type CachedDataNote struct { ··· 110 return nil, fmt.Errorf("failed to get hold memberships: %w", err) 111 } 112 export.HoldMemberships = memberships 113 114 // Add cached data note 115 export.CachedDataNote = CachedDataNote{ ··· 283 284 return memberships, denialRows.Err() 285 }
+56 -32
pkg/appview/handlers/export.go
··· 20 21 // HoldExportResult represents the result of fetching export from a hold 22 type HoldExportResult struct { 23 - HoldDID string `json:"hold_did"` 24 - Endpoint string `json:"endpoint"` 25 - Status string `json:"status"` // "success", "failed", "offline" 26 - Error string `json:"error,omitempty"` 27 - Data json.RawMessage `json:"data,omitempty"` // Raw JSON from hold 28 } 29 30 // FullUserDataExport represents the complete GDPR export including hold data ··· 86 "hold_count", len(holdExports)) 87 } 88 89 // fetchHoldExports fetches export data from all holds where user is a member 90 func (h *ExportUserDataHandler) fetchHoldExports(ctx context.Context, user *db.User) []HoldExportResult { 91 var results []HoldExportResult 92 93 // Get crew memberships from database 94 memberships, err := db.GetCrewMemberships(h.DB, user.DID) 95 if err != nil { ··· 97 "component", "export", 98 "did", user.DID, 99 "error", err) 100 - return results 101 } 102 103 - if len(memberships) == 0 { 104 return results 105 } 106 107 - // Collect unique hold DIDs 108 - holdDIDs := make(map[string]bool) 109 - for _, m := range memberships { 110 - holdDIDs[m.HoldDID] = true 111 - } 112 - 113 - // Also check captain records (holds owned by user) 114 - if h.DB != nil { 115 - captainHolds, err := db.GetCaptainRecordsForOwner(h.DB, user.DID) 116 - if err == nil { 117 - for _, hold := range captainHolds { 118 - holdDIDs[hold.HoldDID] = true 119 - } 120 - } 121 - } 122 - 123 // Fetch from each hold concurrently with timeout 124 var wg sync.WaitGroup 125 - resultChan := make(chan HoldExportResult, len(holdDIDs)) 126 127 - for holdDID := range holdDIDs { 128 wg.Add(1) 129 - go func(holdDID string) { 130 defer wg.Done() 131 - result := h.fetchSingleHoldExport(ctx, user, holdDID) 132 resultChan <- result 133 - }(holdDID) 134 } 135 136 // Wait for all goroutines to complete ··· 146 } 147 148 // fetchSingleHoldExport fetches export data from a single hold 149 - func (h *ExportUserDataHandler) fetchSingleHoldExport(ctx context.Context, user *db.User, holdDID string) HoldExportResult { 150 // Resolve hold DID to URL 151 holdURL := atproto.ResolveHoldURL(holdDID) 152 endpoint := holdURL + "/xrpc/io.atcr.hold.exportUserData" 153 154 result := HoldExportResult{ 155 - HoldDID: holdDID, 156 - Endpoint: endpoint, 157 - Status: "failed", 158 } 159 160 // Check if we have OAuth refresher (needed for service tokens)
··· 20 21 // HoldExportResult represents the result of fetching export from a hold 22 type HoldExportResult struct { 23 + HoldDID string `json:"hold_did"` 24 + Endpoint string `json:"endpoint"` 25 + Relationship string `json:"relationship"` // "captain", "crew_member" 26 + FirstSeen time.Time `json:"first_seen"` 27 + Status string `json:"status"` // "success", "failed", "offline" 28 + Error string `json:"error,omitempty"` 29 + Data json.RawMessage `json:"data,omitempty"` // Raw JSON from hold 30 } 31 32 // FullUserDataExport represents the complete GDPR export including hold data ··· 88 "hold_count", len(holdExports)) 89 } 90 91 + // holdMetadata stores relationship info for a hold 92 + type holdMetadata struct { 93 + relationship string 94 + firstSeen time.Time 95 + } 96 + 97 // fetchHoldExports fetches export data from all holds where user is a member 98 func (h *ExportUserDataHandler) fetchHoldExports(ctx context.Context, user *db.User) []HoldExportResult { 99 var results []HoldExportResult 100 101 + // Build metadata map: holdDID → (relationship, firstSeen) 102 + holdMeta := make(map[string]holdMetadata) 103 + 104 + // Get holds where user is captain 105 + if h.DB != nil { 106 + captainHolds, err := db.GetCaptainRecordsForOwner(h.DB, user.DID) 107 + if err != nil { 108 + slog.Warn("Failed to get captain records for export", 109 + "component", "export", 110 + "did", user.DID, 111 + "error", err) 112 + } else { 113 + for _, hold := range captainHolds { 114 + holdMeta[hold.HoldDID] = holdMetadata{ 115 + relationship: "captain", 116 + firstSeen: hold.UpdatedAt, 117 + } 118 + } 119 + } 120 + } 121 + 122 // Get crew memberships from database 123 memberships, err := db.GetCrewMemberships(h.DB, user.DID) 124 if err != nil { ··· 126 "component", "export", 127 "did", user.DID, 128 "error", err) 129 + } else { 130 + for _, m := range memberships { 131 + // Don't overwrite captain relationship 132 + if _, exists := holdMeta[m.HoldDID]; !exists { 133 + holdMeta[m.HoldDID] = holdMetadata{ 134 + relationship: "crew_member", 135 + firstSeen: m.CreatedAt, 136 + } 137 + } 138 + } 139 } 140 141 + if len(holdMeta) == 0 { 142 return results 143 } 144 145 // Fetch from each hold concurrently with timeout 146 var wg sync.WaitGroup 147 + resultChan := make(chan HoldExportResult, len(holdMeta)) 148 149 + for holdDID, meta := range holdMeta { 150 wg.Add(1) 151 + go func(holdDID string, meta holdMetadata) { 152 defer wg.Done() 153 + result := h.fetchSingleHoldExport(ctx, user, holdDID, meta) 154 resultChan <- result 155 + }(holdDID, meta) 156 } 157 158 // Wait for all goroutines to complete ··· 168 } 169 170 // fetchSingleHoldExport fetches export data from a single hold 171 + func (h *ExportUserDataHandler) fetchSingleHoldExport(ctx context.Context, user *db.User, holdDID string, meta holdMetadata) HoldExportResult { 172 // Resolve hold DID to URL 173 holdURL := atproto.ResolveHoldURL(holdDID) 174 endpoint := holdURL + "/xrpc/io.atcr.hold.exportUserData" 175 176 result := HoldExportResult{ 177 + HoldDID: holdDID, 178 + Endpoint: endpoint, 179 + Relationship: meta.relationship, 180 + FirstSeen: meta.firstSeen, 181 + Status: "failed", 182 } 183 184 // Check if we have OAuth refresher (needed for service tokens)