A third party ATProto appview

adding PDS level backfill for future relay banned user bypass

+3 -1
.claude/settings.local.json
··· 20 "Bash(npm:*)", 21 "Bash(docker-compose restart:*)", 22 "Bash(echo \"# Bluesky Branding Removal Checklist\n\n## 1. App Icons & Images\n- assets/app-icons/*.png - App icons (replace with Aurora Prism branding)\n- assets/favicon.png - Browser favicon\n- assets/icon-android-*.png - Android icons\n- assets/default-avatar.png - Default avatar image\n\n## 2. App Metadata\n- app.json - App name, slug, description\n- package.json - App name and description\n\n## 3. Text References (276 occurrences)\n- Onboarding screens (src/screens/Onboarding/)\n- Signup screens (src/screens/Signup/)\n- Settings/About screens\n- Terms of Service / Privacy Policy references\n- Help text and tooltips\n- Error messages mentioning Bluesky\n\n## 4. URLs\n- bsky.app references (feed URLs, profile URLs)\n- bsky.social references\n- Links to Bluesky support/help\n\n## 5. Service Names\n- Bluesky Moderation Service references\n- Default feed generator names\n\nTotal: 276 text references found\")", 23 - "Bash(psql \"$DATABASE_URL\" -c \"SELECT \n (SELECT COUNT(*) FROM users) as users,\n (SELECT COUNT(*) FROM posts) as posts,\n (SELECT COUNT(*) FROM likes) as likes,\n (SELECT COUNT(*) FROM reposts) as reposts,\n (SELECT COUNT(*) FROM follows) as follows,\n (SELECT COUNT(*) FROM blocks) as blocks;\")" 24 ], 25 "deny": [], 26 "ask": []
··· 20 "Bash(npm:*)", 21 "Bash(docker-compose restart:*)", 22 "Bash(echo \"# Bluesky Branding Removal Checklist\n\n## 1. App Icons & Images\n- assets/app-icons/*.png - App icons (replace with Aurora Prism branding)\n- assets/favicon.png - Browser favicon\n- assets/icon-android-*.png - Android icons\n- assets/default-avatar.png - Default avatar image\n\n## 2. App Metadata\n- app.json - App name, slug, description\n- package.json - App name and description\n\n## 3. Text References (276 occurrences)\n- Onboarding screens (src/screens/Onboarding/)\n- Signup screens (src/screens/Signup/)\n- Settings/About screens\n- Terms of Service / Privacy Policy references\n- Help text and tooltips\n- Error messages mentioning Bluesky\n\n## 4. URLs\n- bsky.app references (feed URLs, profile URLs)\n- bsky.social references\n- Links to Bluesky support/help\n\n## 5. Service Names\n- Bluesky Moderation Service references\n- Default feed generator names\n\nTotal: 276 text references found\")", 23 + "Bash(psql \"$DATABASE_URL\" -c \"SELECT \n (SELECT COUNT(*) FROM users) as users,\n (SELECT COUNT(*) FROM posts) as posts,\n (SELECT COUNT(*) FROM likes) as likes,\n (SELECT COUNT(*) FROM reposts) as reposts,\n (SELECT COUNT(*) FROM follows) as follows,\n (SELECT COUNT(*) FROM blocks) as blocks;\")", 24 + "Bash(dig +short TXT _atproto.spacelawshitpost.me)", 25 + "Bash(python3 -m json.tool)" 26 ], 27 "deny": [], 28 "ask": []
+9
.env.example
··· 1 # AppView Configuration 2 # Copy this to .env and customize as needed 3 4 # Constellation Integration (enhanced stats from microcosm.blue) 5 # Set to false to disable and avoid timeout errors 6 CONSTELLATION_ENABLED=false
··· 1 # AppView Configuration 2 # Copy this to .env and customize as needed 3 4 + # Relay/Firehose Configuration 5 + # Primary relay source (usually Bluesky's main relay) 6 + # RELAY_URL=wss://bsky.network/xrpc/com.atproto.sync.subscribeRepos 7 + 8 + # Additional relay sources (comma-separated) 9 + # Use this to crawl independent PDS instances that aren't in Bluesky's relay 10 + # Example: atproto.africa hosts blacksky.app and other independent instances 11 + # ADDITIONAL_RELAY_URLS=wss://atproto.africa/xrpc/com.atproto.sync.subscribeRepos 12 + 13 # Constellation Integration (enhanced stats from microcosm.blue) 14 # Set to false to disable and avoid timeout errors 15 CONSTELLATION_ENABLED=false
+84
ON_DEMAND_PDS_BACKFILL.md
···
··· 1 + # On-Demand PDS Backfill 2 + 3 + This feature allows your AppView to automatically fetch users from independent PDS instances that aren't federated to Bluesky's relay. 4 + 5 + ## How It Works 6 + 7 + ### Automatic Backfill 8 + When a user tries to view a profile that doesn't exist in your AppView: 9 + 1. The system detects the 404 10 + 2. Resolves the user's DID to find their PDS (via plc.directory) 11 + 3. Fetches their profile and recent content directly from their PDS 12 + 4. Indexes it into your AppView 13 + 5. Future requests will return the cached data 14 + 15 + ### Manual Backfill (Admin Panel) 16 + You can also manually trigger backfills via the admin API: 17 + 18 + ```bash 19 + # Trigger backfill for a specific DID 20 + curl -X POST https://your-appview.com/api/admin/backfill/pds \ 21 + -H "Content-Type: application/json" \ 22 + -d '{"did": "did:plc:63hvnyjvqi2nzzcsjgnry5we"}' 23 + 24 + # Check backfill status 25 + curl https://your-appview.com/api/admin/backfill/pds/status 26 + ``` 27 + 28 + ## Example: Backfilling spacelawshitpost.me 29 + 30 + The user `spacelawshitpost.me` is on blacksky.app PDS and isn't in Bluesky's relay. 31 + 32 + ### Automatic Method 33 + Just try to view their profile in your AppView: 34 + ``` 35 + GET /xrpc/app.bsky.actor.getProfile?actor=did:plc:63hvnyjvqi2nzzcsjgnry5we 36 + ``` 37 + 38 + First request: Returns 404 with message "Attempting to fetch from their PDS" 39 + Wait 5-10 seconds, then retry 40 + Second request: Returns full profile! 41 + 42 + ### Manual Method 43 + ```bash 44 + curl -X POST https://appview.dollspace.gay/api/admin/backfill/pds \ 45 + -H "Content-Type: application/json" \ 46 + -d '{"did": "did:plc:63hvnyjvqi2nzzcsjgnry5we"}' 47 + ``` 48 + 49 + ## Rate Limiting 50 + 51 + - **Cooldown**: 5 minutes per DID (won't re-backfill the same user more frequently) 52 + - **Record Limit**: Maximum 1000 records per collection (prevents abuse) 53 + 54 + ## Collections Backfilled 55 + 56 + When a user is backfilled, the system fetches: 57 + - `app.bsky.actor.profile` - Profile info 58 + - `app.bsky.feed.post` - Posts 59 + - `app.bsky.feed.like` - Likes 60 + - `app.bsky.feed.repost` - Reposts 61 + - `app.bsky.graph.follow` - Follows 62 + - `app.bsky.graph.block` - Blocks 63 + - And any other collections the PDS returns 64 + 65 + ## Monitoring 66 + 67 + Check the server logs for backfill progress: 68 + ``` 69 + [ON_DEMAND_BACKFILL] Starting backfill for did:plc:... 70 + [ON_DEMAND_BACKFILL] did:plc:... is on PDS: blacksky.app 71 + [ON_DEMAND_BACKFILL] Backfilling spacelawshitpost.me from blacksky.app 72 + [ON_DEMAND_BACKFILL] Collections: app.bsky.actor.profile, app.bsky.feed.post, ... 73 + [ON_DEMAND_BACKFILL] Backfilled 42 records from app.bsky.feed.post 74 + [ON_DEMAND_BACKFILL] Completed backfill for did:plc:... 75 + ``` 76 + 77 + ## Why Is This Needed? 78 + 79 + Some users are on independent PDS instances that: 80 + 1. Aren't federated to Bluesky's main relay 81 + 2. Require authentication for firehose access 82 + 3. Are on relays that are currently offline (like atproto.africa) 83 + 84 + This on-demand system ensures your AppView can still serve these users when requested, without continuously polling or maintaining permanent connections to every independent PDS.
+51
server/routes.ts
··· 198 `[FIREHOSE] Worker ${workerId}/${totalWorkers} - TypeScript firehose writer (firehose → Redis)` 199 ); 200 firehoseClient.connect(workerId, totalWorkers); 201 } else if (!firehoseEnabled) { 202 console.log( 203 `[FIREHOSE] TypeScript firehose disabled (using Python firehose)` ··· 3421 'TypeScript backfill has been disabled. Please use the Python backfill service instead.', 3422 info: 'Set BACKFILL_DAYS environment variable and run the Python unified worker.', 3423 }); 3424 }); 3425 3426 // XRPC API Endpoints
··· 198 `[FIREHOSE] Worker ${workerId}/${totalWorkers} - TypeScript firehose writer (firehose → Redis)` 199 ); 200 firehoseClient.connect(workerId, totalWorkers); 201 + 202 + // Connect to additional relay sources (blacksky.app, etc.) 203 + const { initializeAdditionalRelays, additionalRelays } = require('./services/firehose'); 204 + initializeAdditionalRelays(); 205 + 206 + if (additionalRelays.length > 0) { 207 + console.log(`[FIREHOSE] Connecting to ${additionalRelays.length} additional relay sources...`); 208 + additionalRelays.forEach((relay: any, index: number) => { 209 + relay.connect(workerId, totalWorkers); 210 + console.log(`[FIREHOSE] Additional relay ${index + 1} connected`); 211 + }); 212 + } 213 } else if (!firehoseEnabled) { 214 console.log( 215 `[FIREHOSE] TypeScript firehose disabled (using Python firehose)` ··· 3433 'TypeScript backfill has been disabled. Please use the Python backfill service instead.', 3434 info: 'Set BACKFILL_DAYS environment variable and run the Python unified worker.', 3435 }); 3436 + }); 3437 + 3438 + // On-demand PDS backfill endpoint 3439 + app.post('/api/admin/backfill/pds', async (req, res) => { 3440 + try { 3441 + const { did } = req.body; 3442 + 3443 + if (!did || !did.startsWith('did:')) { 3444 + res.status(400).json({ error: 'Invalid DID provided' }); 3445 + return; 3446 + } 3447 + 3448 + const { onDemandBackfill } = require('./services/on-demand-backfill'); 3449 + 3450 + // Trigger backfill (non-blocking) 3451 + onDemandBackfill.backfillUser(did).catch((error: any) => { 3452 + console.error(`[API] On-demand backfill failed for ${did}:`, error); 3453 + }); 3454 + 3455 + res.json({ 3456 + success: true, 3457 + message: `Backfill started for ${did}. Check logs for progress.`, 3458 + }); 3459 + } catch (error) { 3460 + console.error('[API] Error triggering on-demand backfill:', error); 3461 + res.status(500).json({ error: 'Failed to trigger backfill' }); 3462 + } 3463 + }); 3464 + 3465 + // Get on-demand backfill status 3466 + app.get('/api/admin/backfill/pds/status', async (_req, res) => { 3467 + try { 3468 + const { onDemandBackfill } = require('./services/on-demand-backfill'); 3469 + const status = onDemandBackfill.getStatus(); 3470 + res.json(status); 3471 + } catch (error) { 3472 + console.error('[API] Error getting backfill status:', error); 3473 + res.status(500).json({ error: 'Failed to get status' }); 3474 + } 3475 }); 3476 3477 // XRPC API Endpoints
+20
server/services/firehose.ts
··· 710 } 711 712 export const firehoseClient = new FirehoseClient();
··· 710 } 711 712 export const firehoseClient = new FirehoseClient(); 713 + 714 + // Additional relay sources for multi-relay support 715 + export const additionalRelays: FirehoseClient[] = []; 716 + 717 + // Initialize additional relay clients from environment variable 718 + export function initializeAdditionalRelays() { 719 + const additionalRelayUrls = process.env.ADDITIONAL_RELAY_URLS?.split(',').map(url => url.trim()).filter(Boolean) || []; 720 + 721 + if (additionalRelayUrls.length > 0) { 722 + console.log(`[FIREHOSE] Initializing ${additionalRelayUrls.length} additional relay sources:`, additionalRelayUrls); 723 + 724 + for (const relayUrl of additionalRelayUrls) { 725 + const client = new FirehoseClient(relayUrl); 726 + additionalRelays.push(client); 727 + console.log(`[FIREHOSE] Additional relay registered: ${relayUrl}`); 728 + } 729 + } 730 + 731 + return additionalRelays; 732 + }
+232
server/services/on-demand-backfill.ts
···
··· 1 + import { logCollector } from './log-collector'; 2 + import { redisQueue } from './redis-queue'; 3 + import { metricsService } from './metrics'; 4 + 5 + interface BackfillJob { 6 + did: string; 7 + pdsUrl: string; 8 + timestamp: number; 9 + } 10 + 11 + export class OnDemandBackfill { 12 + private activeJobs: Map<string, BackfillJob> = new Map(); 13 + private recentlyBackfilled: Set<string> = new Set(); 14 + private readonly BACKFILL_COOLDOWN = 5 * 60 * 1000; // Don't re-backfill same DID for 5 minutes 15 + 16 + /** 17 + * Backfill a user from their PDS when they're not found in our AppView 18 + */ 19 + async backfillUser(did: string): Promise<boolean> { 20 + // Check if we're already backfilling this DID 21 + if (this.activeJobs.has(did)) { 22 + console.log(`[ON_DEMAND_BACKFILL] Already backfilling ${did}`); 23 + return false; 24 + } 25 + 26 + // Check cooldown - don't spam backfill the same user 27 + if (this.recentlyBackfilled.has(did)) { 28 + console.log(`[ON_DEMAND_BACKFILL] ${did} recently backfilled, skipping`); 29 + return false; 30 + } 31 + 32 + try { 33 + console.log(`[ON_DEMAND_BACKFILL] Starting backfill for ${did}`); 34 + logCollector.info(`On-demand backfill started for ${did}`); 35 + 36 + // First, resolve the DID to find their PDS 37 + const pdsUrl = await this.resolvePDS(did); 38 + 39 + if (!pdsUrl) { 40 + console.warn(`[ON_DEMAND_BACKFILL] Could not resolve PDS for ${did}`); 41 + return false; 42 + } 43 + 44 + console.log(`[ON_DEMAND_BACKFILL] ${did} is on PDS: ${pdsUrl}`); 45 + 46 + // Mark as active 47 + this.activeJobs.set(did, { 48 + did, 49 + pdsUrl, 50 + timestamp: Date.now(), 51 + }); 52 + 53 + // Perform the backfill 54 + await this.performBackfill(did, pdsUrl); 55 + 56 + // Mark as recently backfilled (with cooldown) 57 + this.recentlyBackfilled.add(did); 58 + setTimeout(() => { 59 + this.recentlyBackfilled.delete(did); 60 + }, this.BACKFILL_COOLDOWN); 61 + 62 + // Remove from active jobs 63 + this.activeJobs.delete(did); 64 + 65 + console.log(`[ON_DEMAND_BACKFILL] Completed backfill for ${did}`); 66 + logCollector.success(`On-demand backfill completed for ${did}`); 67 + 68 + return true; 69 + } catch (error) { 70 + console.error(`[ON_DEMAND_BACKFILL] Error backfilling ${did}:`, error); 71 + logCollector.error(`On-demand backfill failed for ${did}`, { error }); 72 + this.activeJobs.delete(did); 73 + metricsService.incrementError(); 74 + return false; 75 + } 76 + } 77 + 78 + private async resolvePDS(did: string): Promise<string | null> { 79 + try { 80 + // Fetch DID document from PLC directory 81 + const plcUrl = `https://plc.directory/${did}`; 82 + const response = await fetch(plcUrl); 83 + 84 + if (!response.ok) { 85 + console.warn(`[ON_DEMAND_BACKFILL] Failed to resolve DID ${did}: ${response.status}`); 86 + return null; 87 + } 88 + 89 + const didDoc = await response.json(); 90 + 91 + // Extract PDS service endpoint 92 + const pdsService = didDoc.service?.find( 93 + (s: any) => s.type === 'AtprotoPersonalDataServer' 94 + ); 95 + 96 + if (!pdsService?.serviceEndpoint) { 97 + console.warn(`[ON_DEMAND_BACKFILL] No PDS service found in DID document for ${did}`); 98 + return null; 99 + } 100 + 101 + // Remove https:// prefix to get just the hostname 102 + const pdsUrl = pdsService.serviceEndpoint.replace(/^https?:\/\//, ''); 103 + 104 + return pdsUrl; 105 + } catch (error) { 106 + console.error(`[ON_DEMAND_BACKFILL] Error resolving PDS for ${did}:`, error); 107 + return null; 108 + } 109 + } 110 + 111 + private async performBackfill(did: string, pdsUrl: string) { 112 + try { 113 + // First, get repo description 114 + const describeUrl = `https://${pdsUrl}/xrpc/com.atproto.repo.describeRepo?repo=${did}`; 115 + const describeResponse = await fetch(describeUrl); 116 + 117 + if (!describeResponse.ok) { 118 + throw new Error(`Failed to describe repo: ${describeResponse.status}`); 119 + } 120 + 121 + const describeData = await describeResponse.json(); 122 + const handle = describeData.handle; 123 + const collections = describeData.collections || []; 124 + 125 + console.log(`[ON_DEMAND_BACKFILL] Backfilling ${handle} (${did}) from ${pdsUrl}`); 126 + console.log(`[ON_DEMAND_BACKFILL] Collections: ${collections.join(', ')}`); 127 + 128 + // Process handle/identity first 129 + await redisQueue.push({ 130 + type: 'identity', 131 + data: { 132 + did: did, 133 + handle: handle, 134 + }, 135 + }); 136 + 137 + metricsService.incrementEvent('#identity'); 138 + 139 + // Backfill each collection 140 + for (const collection of collections) { 141 + await this.backfillCollection(did, pdsUrl, collection); 142 + } 143 + 144 + console.log(`[ON_DEMAND_BACKFILL] Backfilled all collections for ${did}`); 145 + } catch (error) { 146 + console.error(`[ON_DEMAND_BACKFILL] Error during backfill:`, error); 147 + throw error; 148 + } 149 + } 150 + 151 + private async backfillCollection(did: string, pdsUrl: string, collection: string) { 152 + try { 153 + let cursor: string | undefined = undefined; 154 + let totalRecords = 0; 155 + 156 + do { 157 + // Fetch records from this collection 158 + const listUrl = `https://${pdsUrl}/xrpc/com.atproto.repo.listRecords?repo=${did}&collection=${collection}&limit=100${cursor ? `&cursor=${cursor}` : ''}`; 159 + const response = await fetch(listUrl); 160 + 161 + if (!response.ok) { 162 + if (response.status === 400) { 163 + // Collection doesn't exist, skip it 164 + return; 165 + } 166 + throw new Error(`Failed to list ${collection}: ${response.status}`); 167 + } 168 + 169 + const data = await response.json(); 170 + const records = data.records || []; 171 + 172 + if (records.length === 0) { 173 + break; 174 + } 175 + 176 + // Process each record 177 + for (const record of records) { 178 + const path = record.uri.split('/').slice(-2).join('/'); // collection/rkey 179 + 180 + const commit = { 181 + repo: did, 182 + ops: [ 183 + { 184 + action: 'create', 185 + path: path, 186 + cid: record.cid, 187 + record: record.value, 188 + }, 189 + ], 190 + }; 191 + 192 + // Push to Redis queue for processing 193 + await redisQueue.push({ 194 + type: 'commit', 195 + data: commit, 196 + seq: undefined, 197 + }); 198 + 199 + metricsService.incrementEvent('#commit'); 200 + totalRecords++; 201 + } 202 + 203 + cursor = data.cursor; 204 + 205 + // Don't backfill more than 1000 records per collection (prevent abuse) 206 + if (totalRecords >= 1000) { 207 + console.log(`[ON_DEMAND_BACKFILL] Reached limit of 1000 records for ${collection}, stopping`); 208 + break; 209 + } 210 + 211 + } while (cursor); 212 + 213 + if (totalRecords > 0) { 214 + console.log(`[ON_DEMAND_BACKFILL] Backfilled ${totalRecords} records from ${collection}`); 215 + } 216 + 217 + } catch (error) { 218 + console.error(`[ON_DEMAND_BACKFILL] Error backfilling collection ${collection}:`, error); 219 + throw error; 220 + } 221 + } 222 + 223 + getStatus() { 224 + return { 225 + activeJobs: Array.from(this.activeJobs.values()), 226 + recentlyBackfilled: this.recentlyBackfilled.size, 227 + cooldownMs: this.BACKFILL_COOLDOWN, 228 + }; 229 + } 230 + } 231 + 232 + export const onDemandBackfill = new OnDemandBackfill();
+22
server/services/xrpc/services/actor-service.ts
··· 17 suggestedUsersUnspeccedSchema, 18 } from '../schemas'; 19 import { xrpcApi } from '../../xrpc-api'; 20 21 /** 22 * Get a single actor profile ··· 30 const profiles = await (xrpcApi as any)._getProfiles([params.actor], req); 31 32 if (profiles.length === 0) { 33 res.status(404).json({ error: 'Profile not found' }); 34 return; 35 }
··· 17 suggestedUsersUnspeccedSchema, 18 } from '../schemas'; 19 import { xrpcApi } from '../../xrpc-api'; 20 + import { onDemandBackfill } from '../../on-demand-backfill'; 21 22 /** 23 * Get a single actor profile ··· 31 const profiles = await (xrpcApi as any)._getProfiles([params.actor], req); 32 33 if (profiles.length === 0) { 34 + // Profile not found - trigger on-demand backfill from their PDS 35 + const actor = params.actor; 36 + 37 + // Check if it's a DID (not a handle) 38 + if (actor.startsWith('did:')) { 39 + console.log(`[ON_DEMAND] Profile ${actor} not found, triggering backfill...`); 40 + 41 + // Trigger backfill (non-blocking - don't wait for it) 42 + onDemandBackfill.backfillUser(actor).catch(error => { 43 + console.error(`[ON_DEMAND] Backfill failed for ${actor}:`, error); 44 + }); 45 + 46 + // Return 404 with a helpful message 47 + res.status(404).json({ 48 + error: 'ProfileNotFound', 49 + message: 'Profile not found. Attempting to fetch from their PDS - try again in a few seconds.', 50 + }); 51 + return; 52 + } 53 + 54 + // For handles, we'd need to resolve to DID first 55 res.status(404).json({ error: 'Profile not found' }); 56 return; 57 }