A container registry that uses the AT Protocol for manifest storage and S3 for blob storage. atcr.io
docker container atproto go

remove duplicate data from exporter

evan.jarrett.net 263ec4b7 ab7e7c7a

verified
Changed files
+56 -140
pkg
appview
db
handlers
-108
pkg/appview/db/export.go
··· 4 4 "database/sql" 5 5 "fmt" 6 6 "time" 7 - 8 - "atcr.io/pkg/atproto" 9 7 ) 10 8 11 9 // UserDataExport represents the GDPR-compliant data export for a user ··· 18 16 OAuthSessions []OAuthSessionExport `json:"oauth_sessions"` 19 17 UISessions []UISessionExport `json:"ui_sessions"` 20 18 HoldMemberships HoldMembershipsExport `json:"hold_memberships"` 21 - KnownHolds KnownHoldsExport `json:"known_holds"` 22 19 CachedDataNote CachedDataNote `json:"cached_data_note"` 23 20 } 24 21 ··· 68 65 LastDeniedAt time.Time `json:"last_denied_at"` 69 66 } 70 67 71 - // KnownHoldsExport lists holds where the user has interacted 72 - type KnownHoldsExport struct { 73 - Note string `json:"note"` 74 - Holds []KnownHoldExport `json:"holds"` 75 - } 76 - 77 - // KnownHoldExport represents a hold the user has interacted with 78 - type KnownHoldExport struct { 79 - HoldDID string `json:"hold_did"` 80 - Relationship string `json:"relationship"` // "captain", "crew_member" 81 - FirstSeen time.Time `json:"first_seen"` 82 - ExportEndpoint string `json:"export_endpoint"` 83 - } 84 68 85 69 // CachedDataNote explains what cached data exists and how to access it 86 70 type CachedDataNote struct { ··· 126 110 return nil, fmt.Errorf("failed to get hold memberships: %w", err) 127 111 } 128 112 export.HoldMemberships = memberships 129 - 130 - // Get known holds (where user is captain or crew) 131 - knownHolds, err := getKnownHoldsForExport(db, did) 132 - if err != nil { 133 - return nil, fmt.Errorf("failed to get known holds: %w", err) 134 - } 135 - export.KnownHolds = knownHolds 136 113 137 114 // Add cached data note 138 115 export.CachedDataNote = CachedDataNote{ ··· 306 283 307 284 return memberships, denialRows.Err() 308 285 } 309 - 310 - // getKnownHoldsForExport retrieves holds where user is captain or crew member 311 - func getKnownHoldsForExport(db *sql.DB, did string) (KnownHoldsExport, error) { 312 - known := KnownHoldsExport{ 313 - Note: "Hold services where you have interacted. Each hold stores its own records about you. Contact each hold directly to export that data.", 314 - Holds: []KnownHoldExport{}, 315 - } 316 - 317 - // Get holds where user is captain 318 - captainRows, err := db.Query(` 319 - SELECT hold_did, updated_at 320 - FROM hold_captain_records 321 - WHERE owner_did = ? 322 - ORDER BY updated_at DESC 323 - `, did) 324 - if err != nil { 325 - return known, err 326 - } 327 - defer captainRows.Close() 328 - 329 - for captainRows.Next() { 330 - var holdDID string 331 - var updatedAt time.Time 332 - err := captainRows.Scan(&holdDID, &updatedAt) 333 - if err != nil { 334 - return known, err 335 - } 336 - known.Holds = append(known.Holds, KnownHoldExport{ 337 - HoldDID: holdDID, 338 - Relationship: "captain", 339 - FirstSeen: updatedAt, 340 - ExportEndpoint: resolveHoldExportEndpoint(holdDID), 341 - }) 342 - } 343 - if err := captainRows.Err(); err != nil { 344 - return known, err 345 - } 346 - 347 - // Get holds where user is crew member 348 - crewRows, err := db.Query(` 349 - SELECT hold_did, created_at 350 - FROM hold_crew_members 351 - WHERE member_did = ? 352 - ORDER BY created_at DESC 353 - `, did) 354 - if err != nil { 355 - return known, err 356 - } 357 - defer crewRows.Close() 358 - 359 - for crewRows.Next() { 360 - var holdDID string 361 - var createdAt time.Time 362 - err := crewRows.Scan(&holdDID, &createdAt) 363 - if err != nil { 364 - return known, err 365 - } 366 - 367 - // Check if already added as captain 368 - alreadyAdded := false 369 - for _, h := range known.Holds { 370 - if h.HoldDID == holdDID { 371 - alreadyAdded = true 372 - break 373 - } 374 - } 375 - 376 - if !alreadyAdded { 377 - known.Holds = append(known.Holds, KnownHoldExport{ 378 - HoldDID: holdDID, 379 - Relationship: "crew_member", 380 - FirstSeen: createdAt, 381 - ExportEndpoint: resolveHoldExportEndpoint(holdDID), 382 - }) 383 - } 384 - } 385 - 386 - return known, crewRows.Err() 387 - } 388 - 389 - // resolveHoldExportEndpoint converts a hold DID to its export endpoint URL 390 - // Uses the shared ResolveHoldURL for did:web resolution 391 - func resolveHoldExportEndpoint(holdDID string) string { 392 - return atproto.ResolveHoldURL(holdDID) + atproto.HoldExportUserData 393 - }
+56 -32
pkg/appview/handlers/export.go
··· 20 20 21 21 // HoldExportResult represents the result of fetching export from a hold 22 22 type HoldExportResult struct { 23 - HoldDID string `json:"hold_did"` 24 - Endpoint string `json:"endpoint"` 25 - Status string `json:"status"` // "success", "failed", "offline" 26 - Error string `json:"error,omitempty"` 27 - Data json.RawMessage `json:"data,omitempty"` // Raw JSON from hold 23 + HoldDID string `json:"hold_did"` 24 + Endpoint string `json:"endpoint"` 25 + Relationship string `json:"relationship"` // "captain", "crew_member" 26 + FirstSeen time.Time `json:"first_seen"` 27 + Status string `json:"status"` // "success", "failed", "offline" 28 + Error string `json:"error,omitempty"` 29 + Data json.RawMessage `json:"data,omitempty"` // Raw JSON from hold 28 30 } 29 31 30 32 // FullUserDataExport represents the complete GDPR export including hold data ··· 86 88 "hold_count", len(holdExports)) 87 89 } 88 90 91 + // holdMetadata stores relationship info for a hold 92 + type holdMetadata struct { 93 + relationship string 94 + firstSeen time.Time 95 + } 96 + 89 97 // fetchHoldExports fetches export data from all holds where user is a member 90 98 func (h *ExportUserDataHandler) fetchHoldExports(ctx context.Context, user *db.User) []HoldExportResult { 91 99 var results []HoldExportResult 92 100 101 + // Build metadata map: holdDID → (relationship, firstSeen) 102 + holdMeta := make(map[string]holdMetadata) 103 + 104 + // Get holds where user is captain 105 + if h.DB != nil { 106 + captainHolds, err := db.GetCaptainRecordsForOwner(h.DB, user.DID) 107 + if err != nil { 108 + slog.Warn("Failed to get captain records for export", 109 + "component", "export", 110 + "did", user.DID, 111 + "error", err) 112 + } else { 113 + for _, hold := range captainHolds { 114 + holdMeta[hold.HoldDID] = holdMetadata{ 115 + relationship: "captain", 116 + firstSeen: hold.UpdatedAt, 117 + } 118 + } 119 + } 120 + } 121 + 93 122 // Get crew memberships from database 94 123 memberships, err := db.GetCrewMemberships(h.DB, user.DID) 95 124 if err != nil { ··· 97 126 "component", "export", 98 127 "did", user.DID, 99 128 "error", err) 100 - return results 129 + } else { 130 + for _, m := range memberships { 131 + // Don't overwrite captain relationship 132 + if _, exists := holdMeta[m.HoldDID]; !exists { 133 + holdMeta[m.HoldDID] = holdMetadata{ 134 + relationship: "crew_member", 135 + firstSeen: m.CreatedAt, 136 + } 137 + } 138 + } 101 139 } 102 140 103 - if len(memberships) == 0 { 141 + if len(holdMeta) == 0 { 104 142 return results 105 143 } 106 144 107 - // Collect unique hold DIDs 108 - holdDIDs := make(map[string]bool) 109 - for _, m := range memberships { 110 - holdDIDs[m.HoldDID] = true 111 - } 112 - 113 - // Also check captain records (holds owned by user) 114 - if h.DB != nil { 115 - captainHolds, err := db.GetCaptainRecordsForOwner(h.DB, user.DID) 116 - if err == nil { 117 - for _, hold := range captainHolds { 118 - holdDIDs[hold.HoldDID] = true 119 - } 120 - } 121 - } 122 - 123 145 // Fetch from each hold concurrently with timeout 124 146 var wg sync.WaitGroup 125 - resultChan := make(chan HoldExportResult, len(holdDIDs)) 147 + resultChan := make(chan HoldExportResult, len(holdMeta)) 126 148 127 - for holdDID := range holdDIDs { 149 + for holdDID, meta := range holdMeta { 128 150 wg.Add(1) 129 - go func(holdDID string) { 151 + go func(holdDID string, meta holdMetadata) { 130 152 defer wg.Done() 131 - result := h.fetchSingleHoldExport(ctx, user, holdDID) 153 + result := h.fetchSingleHoldExport(ctx, user, holdDID, meta) 132 154 resultChan <- result 133 - }(holdDID) 155 + }(holdDID, meta) 134 156 } 135 157 136 158 // Wait for all goroutines to complete ··· 146 168 } 147 169 148 170 // fetchSingleHoldExport fetches export data from a single hold 149 - func (h *ExportUserDataHandler) fetchSingleHoldExport(ctx context.Context, user *db.User, holdDID string) HoldExportResult { 171 + func (h *ExportUserDataHandler) fetchSingleHoldExport(ctx context.Context, user *db.User, holdDID string, meta holdMetadata) HoldExportResult { 150 172 // Resolve hold DID to URL 151 173 holdURL := atproto.ResolveHoldURL(holdDID) 152 174 endpoint := holdURL + "/xrpc/io.atcr.hold.exportUserData" 153 175 154 176 result := HoldExportResult{ 155 - HoldDID: holdDID, 156 - Endpoint: endpoint, 157 - Status: "failed", 177 + HoldDID: holdDID, 178 + Endpoint: endpoint, 179 + Relationship: meta.relationship, 180 + FirstSeen: meta.firstSeen, 181 + Status: "failed", 158 182 } 159 183 160 184 // Check if we have OAuth refresher (needed for service tokens)