Sifa professional network API (Fastify, AT Protocol, Jetstream) sifa.id/

fix(suggestions): only persist profiles for actual Sifa users (#119)

The profile resolver was inserting profile rows for ALL Bluesky
follows (5000+), polluting the profiles table and inflating user
counts on the admin page.

Now only persists profiles for DIDs that have sessions (actual Sifa
users). For "Not on Sifa" suggestion cards, resolves Bluesky
profile data on-the-fly without persisting.

authored by

Guido X Jansen and committed by
GitHub
03daf5f9 6777004d

+92 -45
+39 -23
src/routes/suggestions.ts
··· 8 8 import { createAuthMiddleware, getAuthContext } from '../middleware/auth.js'; 9 9 import { fetchBlueskyFollowsFromPds, importBlueskyFollows } from '../services/bluesky-follows.js'; 10 10 import { fetchTangledFollowsFromPds, importTangledFollows } from '../services/tangled-follows.js'; 11 - import { resolveAndUpsertProfiles } from '../services/profile-resolver.js'; 11 + import { 12 + resolveAndUpsertProfiles, 13 + fetchProfilesFromBluesky, 14 + } from '../services/profile-resolver.js'; 12 15 13 16 const dismissSchema = z.object({ 14 17 subjectDid: z.string().startsWith('did:'), ··· 129 132 const hasMore = notOnSifaRows.length > limit; 130 133 const notOnSifaItems = hasMore ? notOnSifaRows.slice(0, limit) : notOnSifaRows; 131 134 132 - const notOnSifa = notOnSifaItems.map((i) => ({ 133 - did: i.subjectDid, 134 - handle: i.handle ?? '', 135 - displayName: i.displayName ?? undefined, 136 - avatarUrl: i.avatarUrl ?? undefined, 137 - source: i.source, 138 - dismissed: dismissedSet.has(i.subjectDid), 139 - })); 135 + // Enrich "Not on Sifa" results with Bluesky profile data (without persisting) 136 + const didsNeedingProfiles = notOnSifaItems.filter((i) => !i.handle).map((i) => i.subjectDid); 137 + const enriched = 138 + didsNeedingProfiles.length > 0 139 + ? await fetchProfilesFromBluesky(didsNeedingProfiles, app.log) 140 + : []; 141 + const enrichedMap = new Map(enriched.map((p) => [p.did, p])); 142 + 143 + const notOnSifa = notOnSifaItems.map((i) => { 144 + const bsky = enrichedMap.get(i.subjectDid); 145 + return { 146 + did: i.subjectDid, 147 + handle: i.handle || bsky?.handle || '', 148 + displayName: i.displayName ?? bsky?.displayName, 149 + avatarUrl: i.avatarUrl ?? bsky?.avatarUrl, 150 + source: i.source, 151 + dismissed: dismissedSet.has(i.subjectDid), 152 + }; 153 + }); 140 154 141 155 return reply.send({ 142 156 onSifa, ··· 253 267 app.log.debug({ err, did }, 'Tangled follow sync skipped or failed'); 254 268 } 255 269 256 - // Resolve profiles for imported DIDs that don't have profile data yet 257 - const allDids = [ 258 - ...(blueskyCount > 0 259 - ? ( 260 - await db 261 - .select({ subjectDid: connections.subjectDid }) 262 - .from(connections) 263 - .where(and(eq(connections.followerDid, did), ne(connections.source, 'sifa'))) 264 - ).map((r) => r.subjectDid) 265 - : []), 266 - ]; 267 - if (allDids.length > 0) { 270 + // Only resolve profiles for DIDs that are actual Sifa users (have sessions) 271 + // Do NOT insert profiles for random Bluesky follows — that pollutes the DB. 272 + const claimedFollowDids = await db 273 + .select({ subjectDid: connections.subjectDid }) 274 + .from(connections) 275 + .where( 276 + and( 277 + eq(connections.followerDid, did), 278 + ne(connections.source, 'sifa'), 279 + sql`${connections.subjectDid} IN (SELECT DISTINCT did FROM sessions)`, 280 + ), 281 + ); 282 + const claimedDids = claimedFollowDids.map((r) => r.subjectDid); 283 + if (claimedDids.length > 0) { 268 284 try { 269 - const resolved = await resolveAndUpsertProfiles(db, allDids, app.log); 270 - app.log.info({ did, resolved }, 'Resolved profiles for suggestions'); 285 + const resolved = await resolveAndUpsertProfiles(db, claimedDids, app.log); 286 + app.log.info({ did, resolved }, 'Resolved profiles for On Sifa suggestions'); 271 287 } catch (err) { 272 288 app.log.error({ err, did }, 'Profile resolution for suggestions failed'); 273 289 }
+53 -22
src/services/profile-resolver.ts
··· 6 6 const PUBLIC_API = 'https://public.api.bsky.app'; 7 7 const BATCH_SIZE = 25; 8 8 9 + interface ResolvedProfile { 10 + did: string; 11 + handle: string; 12 + displayName?: string; 13 + avatarUrl?: string; 14 + } 15 + 9 16 /** 10 17 * Resolves profiles from the public Bluesky API for DIDs that don't 11 18 * already have profile data in the local database. 12 - * Returns the number of profiles resolved. 19 + * Only inserts profiles for DIDs that are actual Sifa users (have sessions). 13 20 */ 14 21 export async function resolveAndUpsertProfiles( 15 22 db: Database, ··· 25 32 const missing = dids.filter((d) => !existingDids.has(d)); 26 33 if (missing.length === 0) return 0; 27 34 35 + const resolved = await fetchProfilesFromBluesky(missing, logger); 36 + let upserted = 0; 37 + const now = new Date(); 38 + 39 + for (const profile of resolved) { 40 + try { 41 + await db 42 + .insert(profiles) 43 + .values({ 44 + did: profile.did, 45 + handle: profile.handle, 46 + displayName: profile.displayName ?? null, 47 + avatarUrl: profile.avatarUrl ?? null, 48 + createdAt: now, 49 + }) 50 + .onConflictDoNothing(); 51 + upserted++; 52 + } catch (err) { 53 + logger.debug({ err, did: profile.did }, 'Profile upsert failed'); 54 + } 55 + } 56 + 57 + return upserted; 58 + } 59 + 60 + /** 61 + * Fetches profile data from the public Bluesky API without persisting. 62 + * Used to display names/avatars for "Not on Sifa" suggestion cards. 63 + */ 64 + export async function fetchProfilesFromBluesky( 65 + dids: string[], 66 + logger: FastifyBaseLogger, 67 + ): Promise<ResolvedProfile[]> { 68 + if (dids.length === 0) return []; 69 + 28 70 const publicAgent = new Agent(PUBLIC_API); 29 - let resolved = 0; 30 - const now = new Date(); 71 + const results: ResolvedProfile[] = []; 31 72 32 - // Resolve in batches using getProfiles (max 25 per request) 33 - for (let i = 0; i < missing.length; i += BATCH_SIZE) { 34 - const batch = missing.slice(i, i + BATCH_SIZE); 73 + for (let i = 0; i < dids.length; i += BATCH_SIZE) { 74 + const batch = dids.slice(i, i + BATCH_SIZE); 35 75 try { 36 76 const res = await publicAgent.getProfiles( 37 77 { actors: batch }, 38 78 { signal: AbortSignal.timeout(10000) }, 39 79 ); 40 80 for (const profile of res.data.profiles) { 41 - try { 42 - await db 43 - .insert(profiles) 44 - .values({ 45 - did: profile.did, 46 - handle: profile.handle, 47 - displayName: profile.displayName ?? null, 48 - avatarUrl: profile.avatar ?? null, 49 - createdAt: now, 50 - }) 51 - .onConflictDoNothing(); 52 - resolved++; 53 - } catch (err) { 54 - logger.debug({ err, did: profile.did }, 'Profile upsert failed'); 55 - } 81 + results.push({ 82 + did: profile.did, 83 + handle: profile.handle, 84 + displayName: profile.displayName, 85 + avatarUrl: profile.avatar, 86 + }); 56 87 } 57 88 } catch (err) { 58 89 logger.warn({ err, batchStart: i, batchSize: batch.length }, 'Profile batch resolve failed'); 59 90 } 60 91 } 61 92 62 - return resolved; 93 + return results; 63 94 }