Sifa professional network API (Fastify, AT Protocol, Jetstream) sifa.id/

feat: skills normalization pipeline, typeahead API, and skill-position links (#64)

* feat(jetstream): index skill-position links from position records

* feat(routes): add skills typeahead search API

Add GET /api/skills/search endpoint with pg_trgm similarity matching
on canonical_name and aliases. Returns skills ordered by similarity
score then user_count. Includes canonical_skills schema, migration,
and integration tests.

* feat: add skill normalization service, updated indexer, and complete migration

Adds skill-normalization.ts with normalizeSkillName, createSlug, and
resolveSkill pipeline. Updates skill indexer to resolve against canonical
registry and manage user_count. Fixes migration to include all tables
(pg_trgm, unresolved_skills, skill_position_links, canonical_skill_id).
Adds canonicalSkillId column to skills schema. Part of #61.

* feat(routes): include skill-position links in profile response

Skills now include positionRkeys showing which roles used them.
Positions now include skillRkeys showing which skills were used.
Part of #61.

* fix(tests): replace non-null assertions with optional chaining in normalization tests

* style: fix prettier formatting

authored by

Guido X Jansen and committed by
GitHub
d06eedc6 ea2dd8e8

+986 -10
+36
drizzle/0012_canonical_skills.sql
··· 1 + -- Enable pg_trgm for fuzzy matching in typeahead 2 + CREATE EXTENSION IF NOT EXISTS pg_trgm; 3 + 4 + CREATE TABLE IF NOT EXISTS "canonical_skills" ( 5 + "id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, 6 + "canonical_name" text NOT NULL, 7 + "slug" text NOT NULL UNIQUE, 8 + "category" text, 9 + "subcategory" text, 10 + "aliases" text[] NOT NULL DEFAULT '{}', 11 + "wikidata_id" text, 12 + "user_count" integer NOT NULL DEFAULT 0 13 + ); 14 + 15 + CREATE INDEX IF NOT EXISTS "canonical_skills_slug_idx" ON "canonical_skills" ("slug"); 16 + CREATE INDEX IF NOT EXISTS "canonical_skills_trgm_idx" ON "canonical_skills" USING gin ("canonical_name" gin_trgm_ops); 17 + 18 + CREATE TABLE IF NOT EXISTS "unresolved_skills" ( 19 + "id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, 20 + "raw_name" text NOT NULL, 21 + "normalized_name" text NOT NULL UNIQUE, 22 + "occurrences" integer NOT NULL DEFAULT 1, 23 + "first_seen_at" timestamp with time zone NOT NULL DEFAULT now(), 24 + "resolved_at" timestamp with time zone, 25 + "resolved_to_id" uuid REFERENCES "canonical_skills"("id") 26 + ); 27 + 28 + CREATE TABLE IF NOT EXISTS "skill_position_links" ( 29 + "did" text NOT NULL REFERENCES "profiles"("did") ON DELETE CASCADE, 30 + "position_rkey" text NOT NULL, 31 + "skill_rkey" text NOT NULL, 32 + "indexed_at" timestamp with time zone NOT NULL DEFAULT now(), 33 + PRIMARY KEY ("did", "position_rkey", "skill_rkey") 34 + ); 35 + 36 + ALTER TABLE "skills" ADD COLUMN IF NOT EXISTS "canonical_skill_id" uuid REFERENCES "canonical_skills"("id");
+7
drizzle/meta/_journal.json
··· 85 85 "when": 1773712800000, 86 86 "tag": "0011_add_country_code", 87 87 "breakpoints": true 88 + }, 89 + { 90 + "idx": 12, 91 + "version": "7", 92 + "when": 1773799200000, 93 + "tag": "0012_canonical_skills", 94 + "breakpoints": true 88 95 } 89 96 ] 90 97 }
+12
src/db/schema/canonical-skills.ts
··· 1 + import { pgTable, text, integer, uuid } from 'drizzle-orm/pg-core'; 2 + 3 + export const canonicalSkills = pgTable('canonical_skills', { 4 + id: uuid('id').primaryKey().defaultRandom(), 5 + canonicalName: text('canonical_name').notNull().unique(), 6 + slug: text('slug').notNull().unique(), 7 + category: text('category'), 8 + subcategory: text('subcategory'), 9 + aliases: text('aliases').array().notNull().default([]), 10 + wikidataId: text('wikidata_id'), 11 + userCount: integer('user_count').notNull().default(0), 12 + });
+3
src/db/schema/index.ts
··· 16 16 export { languages } from './languages.js'; 17 17 export { suggestionDismissals } from './suggestion-dismissals.js'; 18 18 export { invites } from './invites.js'; 19 + export { canonicalSkills } from './canonical-skills.js'; 20 + export { unresolvedSkills } from './unresolved-skills.js'; 21 + export { skillPositionLinks } from './skill-position-links.js';
+15
src/db/schema/skill-position-links.ts
··· 1 + import { pgTable, text, timestamp, primaryKey } from 'drizzle-orm/pg-core'; 2 + import { profiles } from './profiles.js'; 3 + 4 + export const skillPositionLinks = pgTable( 5 + 'skill_position_links', 6 + { 7 + did: text('did') 8 + .notNull() 9 + .references(() => profiles.did, { onDelete: 'cascade' }), 10 + positionRkey: text('position_rkey').notNull(), 11 + skillRkey: text('skill_rkey').notNull(), 12 + indexedAt: timestamp('indexed_at', { withTimezone: true }).notNull().defaultNow(), 13 + }, 14 + (table) => [primaryKey({ columns: [table.did, table.positionRkey, table.skillRkey] })], 15 + );
+3 -1
src/db/schema/skills.ts
··· 1 - import { pgTable, text, timestamp, primaryKey } from 'drizzle-orm/pg-core'; 1 + import { pgTable, text, timestamp, primaryKey, uuid } from 'drizzle-orm/pg-core'; 2 2 import { profiles } from './profiles.js'; 3 + import { canonicalSkills } from './canonical-skills.js'; 3 4 4 5 export const skills = pgTable( 5 6 'skills', ··· 10 11 rkey: text('rkey').notNull(), 11 12 skillName: text('skill_name').notNull(), 12 13 category: text('category'), 14 + canonicalSkillId: uuid('canonical_skill_id').references(() => canonicalSkills.id), 13 15 createdAt: timestamp('created_at', { withTimezone: true }).notNull(), 14 16 indexedAt: timestamp('indexed_at', { withTimezone: true }).notNull().defaultNow(), 15 17 },
+12
src/db/schema/unresolved-skills.ts
··· 1 + import { pgTable, uuid, text, integer, timestamp } from 'drizzle-orm/pg-core'; 2 + import { canonicalSkills } from './canonical-skills.js'; 3 + 4 + export const unresolvedSkills = pgTable('unresolved_skills', { 5 + id: uuid('id').primaryKey().defaultRandom(), 6 + rawName: text('raw_name').notNull(), 7 + normalizedName: text('normalized_name').notNull().unique(), 8 + occurrences: integer('occurrences').notNull().default(1), 9 + firstSeenAt: timestamp('first_seen_at', { withTimezone: true }).notNull().defaultNow(), 10 + resolvedAt: timestamp('resolved_at', { withTimezone: true }), 11 + resolvedToId: uuid('resolved_to_id').references(() => canonicalSkills.id), 12 + });
+42 -2
src/jetstream/indexers/position.ts
··· 1 1 import type { Database } from '../../db/index.js'; 2 - import { positions } from '../../db/schema/index.js'; 2 + import { positions, skillPositionLinks } from '../../db/schema/index.js'; 3 3 import { and, eq } from 'drizzle-orm'; 4 4 import type { JetstreamEvent } from '../types.js'; 5 5 import { logger } from '../../logger.js'; 6 6 import { sanitize, sanitizeOptional } from '../../lib/sanitize.js'; 7 7 8 + interface StrongRef { 9 + uri: string; 10 + cid: string; 11 + } 12 + 13 + /** Extract rkey from an AT Protocol URI: at://did/collection/rkey */ 14 + function parseRkeyFromUri(uri: string): string | null { 15 + const parts = uri.split('/'); 16 + return parts.length >= 5 ? (parts[4] ?? null) : null; 17 + } 18 + 8 19 interface RecordLocation { 9 20 country?: string; 10 21 region?: string; ··· 20 31 const { operation, rkey, record } = commit; 21 32 22 33 if (operation === 'delete') { 34 + await db 35 + .delete(skillPositionLinks) 36 + .where(and(eq(skillPositionLinks.did, did), eq(skillPositionLinks.positionRkey, rkey))); 23 37 await db.delete(positions).where(and(eq(positions.did, did), eq(positions.rkey, rkey))); 24 38 logger.info({ did, rkey }, 'Deleted position'); 25 39 return; ··· 70 84 }, 71 85 }); 72 86 73 - logger.info({ did, rkey, operation }, 'Indexed position'); 87 + // Sync skill-position links: delete-and-replace strategy 88 + await db 89 + .delete(skillPositionLinks) 90 + .where(and(eq(skillPositionLinks.did, did), eq(skillPositionLinks.positionRkey, rkey))); 91 + 92 + const skillRefs = record.skills as StrongRef[] | undefined; 93 + if (skillRefs && Array.isArray(skillRefs) && skillRefs.length > 0) { 94 + const linkValues = skillRefs 95 + .map((ref) => { 96 + const skillRkey = parseRkeyFromUri(ref.uri); 97 + if (!skillRkey) { 98 + logger.warn( 99 + { did, rkey, uri: ref.uri }, 100 + 'Could not parse skill rkey from strongRef URI', 101 + ); 102 + return null; 103 + } 104 + return { did, positionRkey: rkey, skillRkey }; 105 + }) 106 + .filter((v): v is NonNullable<typeof v> => v !== null); 107 + 108 + if (linkValues.length > 0) { 109 + await db.insert(skillPositionLinks).values(linkValues).onConflictDoNothing(); 110 + } 111 + } 112 + 113 + logger.info({ did, rkey, operation, skillLinks: skillRefs?.length ?? 0 }, 'Indexed position'); 74 114 }; 75 115 }
+55 -7
src/jetstream/indexers/skill.ts
··· 1 1 import type { Database } from '../../db/index.js'; 2 - import { skills } from '../../db/schema/index.js'; 3 - import { and, eq } from 'drizzle-orm'; 2 + import { skills, canonicalSkills } from '../../db/schema/index.js'; 3 + import { and, eq, sql } from 'drizzle-orm'; 4 4 import type { JetstreamEvent } from '../types.js'; 5 5 import { logger } from '../../logger.js'; 6 6 import { sanitize, sanitizeOptional } from '../../lib/sanitize.js'; 7 + import { resolveSkill } from '../../services/skill-normalization.js'; 7 8 8 9 export function createSkillIndexer(db: Database) { 9 10 return async (event: JetstreamEvent) => { ··· 13 14 const { operation, rkey, record } = commit; 14 15 15 16 if (operation === 'delete') { 17 + // Look up canonical_skill_id before deleting so we can decrement user_count 18 + const existing = await db 19 + .select({ canonicalSkillId: skills.canonicalSkillId }) 20 + .from(skills) 21 + .where(and(eq(skills.did, did), eq(skills.rkey, rkey))) 22 + .limit(1); 23 + 16 24 await db.delete(skills).where(and(eq(skills.did, did), eq(skills.rkey, rkey))); 25 + 26 + if (existing[0]?.canonicalSkillId) { 27 + await db 28 + .update(canonicalSkills) 29 + .set({ userCount: sql`GREATEST(${canonicalSkills.userCount} - 1, 0)` }) 30 + .where(eq(canonicalSkills.id, existing[0].canonicalSkillId)); 31 + } 32 + 17 33 logger.info({ did, rkey }, 'Deleted skill'); 18 34 return; 19 35 } 20 36 21 37 if (!record) return; 22 38 39 + const skillName = sanitize(record.skillName as string); 40 + const category = sanitizeOptional(record.category as string | undefined) ?? null; 41 + 42 + // Run normalization pipeline 43 + const canonical = await resolveSkill(db, skillName); 44 + const canonicalSkillId = canonical?.id ?? null; 45 + 46 + // Check if this is an update (existing record may already be linked to a different canonical) 47 + const existing = await db 48 + .select({ canonicalSkillId: skills.canonicalSkillId }) 49 + .from(skills) 50 + .where(and(eq(skills.did, did), eq(skills.rkey, rkey))) 51 + .limit(1); 52 + 53 + const previousCanonicalId = existing[0]?.canonicalSkillId ?? null; 54 + 23 55 await db 24 56 .insert(skills) 25 57 .values({ 26 58 did, 27 59 rkey, 28 - skillName: sanitize(record.skillName as string), 29 - category: sanitizeOptional(record.category as string | undefined) ?? null, 60 + skillName, 61 + category, 62 + canonicalSkillId, 30 63 createdAt: new Date(record.createdAt as string), 31 64 indexedAt: new Date(), 32 65 }) 33 66 .onConflictDoUpdate({ 34 67 target: [skills.did, skills.rkey], 35 68 set: { 36 - skillName: sanitize(record.skillName as string), 37 - category: sanitizeOptional(record.category as string | undefined) ?? null, 69 + skillName, 70 + category, 71 + canonicalSkillId, 38 72 indexedAt: new Date(), 39 73 }, 40 74 }); 41 75 42 - logger.info({ did, rkey, operation }, 'Indexed skill'); 76 + // Update user_count: decrement old canonical (if changed), increment new 77 + if (previousCanonicalId && previousCanonicalId !== canonicalSkillId) { 78 + await db 79 + .update(canonicalSkills) 80 + .set({ userCount: sql`GREATEST(${canonicalSkills.userCount} - 1, 0)` }) 81 + .where(eq(canonicalSkills.id, previousCanonicalId)); 82 + } 83 + if (canonicalSkillId && canonicalSkillId !== previousCanonicalId) { 84 + await db 85 + .update(canonicalSkills) 86 + .set({ userCount: sql`${canonicalSkills.userCount} + 1` }) 87 + .where(eq(canonicalSkills.id, canonicalSkillId)); 88 + } 89 + 90 + logger.info({ did, rkey, operation, canonicalSkillId }, 'Indexed skill'); 43 91 }; 44 92 }
+18
src/routes/profile.ts
··· 18 18 externalAccounts, 19 19 externalAccountVerifications, 20 20 invites, 21 + skillPositionLinks, 21 22 } from '../db/schema/index.js'; 22 23 import { resolveSessionDid } from '../middleware/auth.js'; 23 24 import { isVerifiablePlatform } from '../services/verification.js'; ··· 146 147 profileLanguages, 147 148 profileExternalAccounts, 148 149 verifications, 150 + profileSkillPositionLinks, 149 151 ] = await Promise.all([ 150 152 db.select().from(positions).where(eq(positions.did, profile.did)), 151 153 db.select().from(education).where(eq(education.did, profile.did)), ··· 162 164 .select() 163 165 .from(externalAccountVerifications) 164 166 .where(eq(externalAccountVerifications.did, profile.did)), 167 + db.select().from(skillPositionLinks).where(eq(skillPositionLinks.did, profile.did)), 165 168 ]); 166 169 167 170 const resolved = resolveProfileFields( ··· 204 207 ].filter(Boolean); 205 208 const location = locationParts.length > 0 ? locationParts.join(', ') : null; 206 209 210 + // Build skill-position link lookup maps 211 + const linksBySkill = new Map<string, string[]>(); 212 + const linksByPosition = new Map<string, string[]>(); 213 + for (const link of profileSkillPositionLinks) { 214 + const skillList = linksBySkill.get(link.skillRkey) ?? []; 215 + skillList.push(link.positionRkey); 216 + linksBySkill.set(link.skillRkey, skillList); 217 + 218 + const posList = linksByPosition.get(link.positionRkey) ?? []; 219 + posList.push(link.skillRkey); 220 + linksByPosition.set(link.positionRkey, posList); 221 + } 222 + 207 223 // Find primary external account for website display 208 224 const [primaryAccount] = await db 209 225 .select() ··· 250 266 startDate: p.startDate, 251 267 endDate: p.endDate, 252 268 current: p.current, 269 + skillRkeys: linksByPosition.get(p.rkey) ?? [], 253 270 })), 254 271 education: profileEducation.map((e) => ({ 255 272 rkey: e.rkey, ··· 265 282 rkey: s.rkey, 266 283 skillName: s.skillName, 267 284 category: s.category, 285 + positionRkeys: linksBySkill.get(s.rkey) ?? [], 268 286 })), 269 287 certifications: profileCertifications.map((c) => ({ 270 288 rkey: c.rkey,
+50
src/routes/skills.ts
··· 1 + import type { FastifyInstance } from 'fastify'; 2 + import type { Database } from '../db/index.js'; 3 + import { sql } from 'drizzle-orm'; 4 + 5 + export function registerSkillsRoutes(app: FastifyInstance, db: Database) { 6 + app.get('/api/skills/search', async (request, reply) => { 7 + const { q, limit = '20' } = request.query as Record<string, string>; 8 + if (!q?.trim()) { 9 + return reply 10 + .status(400) 11 + .send({ error: 'InvalidRequest', message: 'Query parameter q is required' }); 12 + } 13 + 14 + const limitNum = Math.min(parseInt(limit, 10) || 20, 100); 15 + const searchTerm = q.trim().toLowerCase(); 16 + 17 + // Search canonical_name with pg_trgm similarity + alias array matching 18 + // Order by similarity score first, then user_count for tie-breaking 19 + const results = await db.execute(sql` 20 + SELECT id, canonical_name, slug, category, user_count, 21 + GREATEST( 22 + similarity(lower(canonical_name), ${searchTerm}), 23 + (SELECT COALESCE(MAX(similarity(alias, ${searchTerm})), 0) 24 + FROM unnest(aliases) AS alias) 25 + ) AS sim 26 + FROM canonical_skills 27 + WHERE lower(canonical_name) % ${searchTerm} 28 + OR canonical_name ILIKE ${'%' + searchTerm + '%'} 29 + OR EXISTS ( 30 + SELECT 1 FROM unnest(aliases) AS alias 31 + WHERE alias % ${searchTerm} OR alias ILIKE ${'%' + searchTerm + '%'} 32 + ) 33 + ORDER BY sim DESC, user_count DESC 34 + LIMIT ${limitNum} 35 + `); 36 + 37 + return { 38 + skills: results.rows.map((row) => { 39 + const r = row as Record<string, unknown>; 40 + return { 41 + id: r.id, 42 + canonicalName: r.canonical_name, 43 + slug: r.slug, 44 + category: r.category, 45 + userCount: r.user_count, 46 + }; 47 + }), 48 + }; 49 + }); 50 + }
+2
src/server.ts
··· 18 18 import { registerImportRoutes } from './routes/import.js'; 19 19 import { registerFollowRoutes } from './routes/follow.js'; 20 20 import { registerSearchRoutes } from './routes/search.js'; 21 + import { registerSkillsRoutes } from './routes/skills.js'; 21 22 import { registerExternalAccountRoutes } from './routes/external-accounts.js'; 22 23 import { registerSuggestionRoutes } from './routes/suggestions.js'; 23 24 import { registerWellKnownRoutes } from './routes/well-known.js'; ··· 97 98 registerImportRoutes(app, db, oauthClient); 98 99 registerFollowRoutes(app, db, oauthClient); 99 100 registerSearchRoutes(app, db); 101 + registerSkillsRoutes(app, db); 100 102 registerExternalAccountRoutes(app, db, oauthClient, valkey); 101 103 registerSuggestionRoutes(app, db, oauthClient, config.PUBLIC_URL); 102 104
+71
src/services/skill-normalization.ts
··· 1 + import type { Database } from '../db/index.js'; 2 + import { canonicalSkills, unresolvedSkills } from '../db/schema/index.js'; 3 + import { eq, sql } from 'drizzle-orm'; 4 + import { logger } from '../logger.js'; 5 + 6 + /** Normalize a skill name for matching: lowercase, trim, collapse whitespace */ 7 + export function normalizeSkillName(name: string): string { 8 + return name.toLowerCase().trim().replace(/\s+/g, ' '); 9 + } 10 + 11 + /** Create a URL-safe slug from a skill name */ 12 + export function createSlug(name: string): string { 13 + return name 14 + .toLowerCase() 15 + .trim() 16 + .replace(/c\+\+/gi, 'c-plus-plus') 17 + .replace(/c#/gi, 'c-sharp') 18 + .replace(/\.net/gi, 'dot-net') 19 + .replace(/[^a-z0-9]+/g, '-') 20 + .replace(/-+/g, '-') 21 + .replace(/^-|-$/g, ''); 22 + } 23 + 24 + /** 25 + * Resolve a user-entered skill name to a canonical skill. 26 + * Pipeline: normalize -> check slug match -> check aliases -> queue as unresolved. 27 + * Returns the canonical skill row if matched, null if unresolved. 28 + */ 29 + export async function resolveSkill( 30 + db: Database, 31 + rawName: string, 32 + ): Promise<typeof canonicalSkills.$inferSelect | null> { 33 + const normalized = normalizeSkillName(rawName); 34 + 35 + // 1. Exact match on slug 36 + const bySlug = await db 37 + .select() 38 + .from(canonicalSkills) 39 + .where(eq(canonicalSkills.slug, createSlug(rawName))) 40 + .limit(1); 41 + if (bySlug[0]) { 42 + return bySlug[0]; 43 + } 44 + 45 + // 2. Check aliases array (any canonical_skills row where normalized name is in aliases) 46 + const byAlias = await db 47 + .select() 48 + .from(canonicalSkills) 49 + .where(sql`${normalized} = ANY(${canonicalSkills.aliases})`) 50 + .limit(1); 51 + if (byAlias[0]) { 52 + return byAlias[0]; 53 + } 54 + 55 + // 3. No match -- add to unresolved queue 56 + await db 57 + .insert(unresolvedSkills) 58 + .values({ 59 + rawName, 60 + normalizedName: normalized, 61 + }) 62 + .onConflictDoUpdate({ 63 + target: unresolvedSkills.normalizedName, 64 + set: { 65 + occurrences: sql`${unresolvedSkills.occurrences} + 1`, 66 + }, 67 + }); 68 + 69 + logger.info({ rawName, normalized }, 'Skill queued as unresolved'); 70 + return null; 71 + }
+15
tests/db/schema-canonical-skills.test.ts
··· 1 + import { describe, it, expect } from 'vitest'; 2 + import { canonicalSkills } from '../../src/db/schema/index.js'; 3 + 4 + describe('canonical_skills schema', () => { 5 + it('has expected columns', () => { 6 + expect(canonicalSkills.id).toBeDefined(); 7 + expect(canonicalSkills.canonicalName).toBeDefined(); 8 + expect(canonicalSkills.slug).toBeDefined(); 9 + expect(canonicalSkills.category).toBeDefined(); 10 + expect(canonicalSkills.subcategory).toBeDefined(); 11 + expect(canonicalSkills.aliases).toBeDefined(); 12 + expect(canonicalSkills.wikidataId).toBeDefined(); 13 + expect(canonicalSkills.userCount).toBeDefined(); 14 + }); 15 + });
+11
tests/db/schema-skill-position-links.test.ts
··· 1 + import { describe, it, expect } from 'vitest'; 2 + import { skillPositionLinks } from '../../src/db/schema/index.js'; 3 + 4 + describe('skill_position_links schema', () => { 5 + it('has expected columns', () => { 6 + expect(skillPositionLinks.did).toBeDefined(); 7 + expect(skillPositionLinks.positionRkey).toBeDefined(); 8 + expect(skillPositionLinks.skillRkey).toBeDefined(); 9 + expect(skillPositionLinks.indexedAt).toBeDefined(); 10 + }); 11 + });
+14
tests/db/schema-unresolved-skills.test.ts
··· 1 + import { describe, it, expect } from 'vitest'; 2 + import { unresolvedSkills } from '../../src/db/schema/index.js'; 3 + 4 + describe('unresolved_skills schema', () => { 5 + it('has expected columns', () => { 6 + expect(unresolvedSkills.id).toBeDefined(); 7 + expect(unresolvedSkills.rawName).toBeDefined(); 8 + expect(unresolvedSkills.normalizedName).toBeDefined(); 9 + expect(unresolvedSkills.occurrences).toBeDefined(); 10 + expect(unresolvedSkills.firstSeenAt).toBeDefined(); 11 + expect(unresolvedSkills.resolvedAt).toBeDefined(); 12 + expect(unresolvedSkills.resolvedToId).toBeDefined(); 13 + }); 14 + });
+169
tests/jetstream/position-indexer-skills.test.ts
··· 1 + import { describe, it, expect, beforeAll, afterAll } from 'vitest'; 2 + import { createDb } from '../../src/db/index.js'; 3 + import { positions, profiles, skills, skillPositionLinks } from '../../src/db/schema/index.js'; 4 + import { createPositionIndexer } from '../../src/jetstream/indexers/position.js'; 5 + import { eq, and, sql } from 'drizzle-orm'; 6 + 7 + describe('Position indexer skill-position links', () => { 8 + const db = createDb(process.env.DATABASE_URL ?? 'postgresql://sifa:sifa@localhost:5432/sifa'); 9 + const testDid = 'did:plc:pos-skill-link-test'; 10 + 11 + beforeAll(async () => { 12 + await db 13 + .insert(profiles) 14 + .values({ 15 + did: testDid, 16 + handle: 'pos-skill-test.bsky.social', 17 + createdAt: new Date(), 18 + }) 19 + .onConflictDoNothing(); 20 + 21 + await db 22 + .insert(skills) 23 + .values({ 24 + did: testDid, 25 + rkey: '3skill1', 26 + skillName: 'TypeScript', 27 + createdAt: new Date(), 28 + }) 29 + .onConflictDoNothing(); 30 + 31 + await db 32 + .insert(skills) 33 + .values({ 34 + did: testDid, 35 + rkey: '3skill2', 36 + skillName: 'PostgreSQL', 37 + createdAt: new Date(), 38 + }) 39 + .onConflictDoNothing(); 40 + }); 41 + 42 + afterAll(async () => { 43 + await db.execute(sql`DELETE FROM skill_position_links WHERE did = ${testDid}`); 44 + await db.delete(positions).where(eq(positions.did, testDid)); 45 + await db.delete(skills).where(eq(skills.did, testDid)); 46 + await db.execute(sql`DELETE FROM profiles WHERE did = ${testDid}`); 47 + await db.$client.end(); 48 + }); 49 + 50 + it('creates skill-position links from position skills array', async () => { 51 + const indexer = createPositionIndexer(db); 52 + await indexer({ 53 + did: testDid, 54 + time_us: 1234567890, 55 + kind: 'commit', 56 + commit: { 57 + rev: 'rev1', 58 + operation: 'create', 59 + collection: 'id.sifa.profile.position', 60 + rkey: '3pos1', 61 + record: { 62 + companyName: 'Acme', 63 + title: 'Engineer', 64 + startDate: '2024-01', 65 + current: true, 66 + createdAt: '2026-01-01T00:00:00Z', 67 + skills: [ 68 + { uri: `at://${testDid}/id.sifa.profile.skill/3skill1`, cid: 'bafyabc1' }, 69 + { uri: `at://${testDid}/id.sifa.profile.skill/3skill2`, cid: 'bafyabc2' }, 70 + ], 71 + }, 72 + }, 73 + }); 74 + 75 + const links = await db 76 + .select() 77 + .from(skillPositionLinks) 78 + .where( 79 + and(eq(skillPositionLinks.did, testDid), eq(skillPositionLinks.positionRkey, '3pos1')), 80 + ); 81 + expect(links).toHaveLength(2); 82 + expect(links.map((l) => l.skillRkey).sort()).toEqual(['3skill1', '3skill2']); 83 + }); 84 + 85 + it('replaces links on position update', async () => { 86 + const indexer = createPositionIndexer(db); 87 + await indexer({ 88 + did: testDid, 89 + time_us: 1234567891, 90 + kind: 'commit', 91 + commit: { 92 + rev: 'rev2', 93 + operation: 'update', 94 + collection: 'id.sifa.profile.position', 95 + rkey: '3pos1', 96 + record: { 97 + companyName: 'Acme', 98 + title: 'Senior Engineer', 99 + startDate: '2024-01', 100 + current: true, 101 + createdAt: '2026-01-01T00:00:00Z', 102 + skills: [{ uri: `at://${testDid}/id.sifa.profile.skill/3skill1`, cid: 'bafyabc1' }], 103 + }, 104 + }, 105 + }); 106 + 107 + const links = await db 108 + .select() 109 + .from(skillPositionLinks) 110 + .where( 111 + and(eq(skillPositionLinks.did, testDid), eq(skillPositionLinks.positionRkey, '3pos1')), 112 + ); 113 + expect(links).toHaveLength(1); 114 + expect(links[0].skillRkey).toBe('3skill1'); 115 + }); 116 + 117 + it('removes all links on position delete', async () => { 118 + const indexer = createPositionIndexer(db); 119 + await indexer({ 120 + did: testDid, 121 + time_us: 1234567892, 122 + kind: 'commit', 123 + commit: { 124 + rev: 'rev3', 125 + operation: 'delete', 126 + collection: 'id.sifa.profile.position', 127 + rkey: '3pos1', 128 + }, 129 + }); 130 + 131 + const links = await db 132 + .select() 133 + .from(skillPositionLinks) 134 + .where( 135 + and(eq(skillPositionLinks.did, testDid), eq(skillPositionLinks.positionRkey, '3pos1')), 136 + ); 137 + expect(links).toHaveLength(0); 138 + }); 139 + 140 + it('handles position with no skills array gracefully', async () => { 141 + const indexer = createPositionIndexer(db); 142 + await indexer({ 143 + did: testDid, 144 + time_us: 1234567893, 145 + kind: 'commit', 146 + commit: { 147 + rev: 'rev4', 148 + operation: 'create', 149 + collection: 'id.sifa.profile.position', 150 + rkey: '3pos2', 151 + record: { 152 + companyName: 'Other Corp', 153 + title: 'Dev', 154 + startDate: '2025-01', 155 + current: false, 156 + createdAt: '2026-01-01T00:00:00Z', 157 + }, 158 + }, 159 + }); 160 + 161 + const links = await db 162 + .select() 163 + .from(skillPositionLinks) 164 + .where( 165 + and(eq(skillPositionLinks.did, testDid), eq(skillPositionLinks.positionRkey, '3pos2')), 166 + ); 167 + expect(links).toHaveLength(0); 168 + }); 169 + });
+98
tests/jetstream/skill-indexer.test.ts
··· 1 + import { describe, it, expect, beforeAll, afterAll } from 'vitest'; 2 + import { createDb } from '../../src/db/index.js'; 3 + import { skills, canonicalSkills, profiles } from '../../src/db/schema/index.js'; 4 + import { createSkillIndexer } from '../../src/jetstream/indexers/skill.js'; 5 + import { eq, and, sql } from 'drizzle-orm'; 6 + 7 + describe('Skill indexer with normalization', () => { 8 + const db = createDb(process.env.DATABASE_URL ?? 'postgresql://sifa:sifa@localhost:5432/sifa'); 9 + const testDid = 'did:plc:skill-indexer-test'; 10 + 11 + beforeAll(async () => { 12 + await db 13 + .insert(profiles) 14 + .values({ 15 + did: testDid, 16 + handle: 'skill-test.bsky.social', 17 + createdAt: new Date(), 18 + }) 19 + .onConflictDoNothing(); 20 + 21 + await db 22 + .insert(canonicalSkills) 23 + .values({ 24 + canonicalName: 'TypeScript', 25 + slug: 'typescript', 26 + category: 'technical', 27 + aliases: ['ts', 'typescript'], 28 + userCount: 0, 29 + }) 30 + .onConflictDoNothing(); 31 + }); 32 + 33 + afterAll(async () => { 34 + await db.delete(skills).where(eq(skills.did, testDid)); 35 + await db.execute(sql`DELETE FROM canonical_skills WHERE slug = 'typescript'`); 36 + await db.execute(sql`DELETE FROM unresolved_skills WHERE normalized_name = 'ts'`); 37 + await db.execute(sql`DELETE FROM profiles WHERE did = ${testDid}`); 38 + await db.$client.end(); 39 + }); 40 + 41 + it('indexes skill and resolves to canonical entry', async () => { 42 + const indexer = createSkillIndexer(db); 43 + await indexer({ 44 + did: testDid, 45 + time_us: 1234567890, 46 + kind: 'commit', 47 + commit: { 48 + rev: 'rev1', 49 + operation: 'create', 50 + collection: 'id.sifa.profile.skill', 51 + rkey: '3skilltest1', 52 + record: { 53 + skillName: 'TS', 54 + category: 'technical', 55 + createdAt: '2026-01-01T00:00:00Z', 56 + }, 57 + }, 58 + }); 59 + 60 + const indexed = await db 61 + .select() 62 + .from(skills) 63 + .where(and(eq(skills.did, testDid), eq(skills.rkey, '3skilltest1'))); 64 + expect(indexed).toHaveLength(1); 65 + expect(indexed[0].skillName).toBe('TS'); 66 + expect(indexed[0].canonicalSkillId).toBeDefined(); 67 + expect(indexed[0].canonicalSkillId).not.toBeNull(); 68 + }); 69 + 70 + it('increments user_count on canonical skill when resolved', async () => { 71 + const canonical = await db 72 + .select() 73 + .from(canonicalSkills) 74 + .where(eq(canonicalSkills.slug, 'typescript')); 75 + expect(canonical[0].userCount).toBeGreaterThanOrEqual(1); 76 + }); 77 + 78 + it('deletes skill and decrements user_count', async () => { 79 + const indexer = createSkillIndexer(db); 80 + await indexer({ 81 + did: testDid, 82 + time_us: 1234567891, 83 + kind: 'commit', 84 + commit: { 85 + rev: 'rev2', 86 + operation: 'delete', 87 + collection: 'id.sifa.profile.skill', 88 + rkey: '3skilltest1', 89 + }, 90 + }); 91 + 92 + const indexed = await db 93 + .select() 94 + .from(skills) 95 + .where(and(eq(skills.did, testDid), eq(skills.rkey, '3skilltest1'))); 96 + expect(indexed).toHaveLength(0); 97 + }); 98 + });
+110
tests/routes/profile-skill-links.test.ts
··· 1 + import { describe, it, expect, beforeAll, afterAll } from 'vitest'; 2 + import { buildServer } from '../../src/server.js'; 3 + import { createDb } from '../../src/db/index.js'; 4 + import { profiles, positions, skills, skillPositionLinks } from '../../src/db/schema/index.js'; 5 + import { sql } from 'drizzle-orm'; 6 + import { writeFileSync, mkdirSync, rmSync } from 'node:fs'; 7 + import { tmpdir } from 'node:os'; 8 + import { join } from 'node:path'; 9 + import type { FastifyInstance } from 'fastify'; 10 + 11 + describe('Profile API -- skill-position links', () => { 12 + let app: FastifyInstance; 13 + const db = createDb(process.env.DATABASE_URL ?? 'postgresql://sifa:sifa@localhost:5432/sifa'); 14 + const tmpKeysDir = join(tmpdir(), `sifa-test-keys-profile-skills-${Date.now()}`); 15 + const jwksPath = join(tmpKeysDir, 'jwks.json'); 16 + const testDid = 'did:plc:profile-skill-link-test'; 17 + 18 + beforeAll(async () => { 19 + mkdirSync(tmpKeysDir, { recursive: true }); 20 + writeFileSync(jwksPath, JSON.stringify({ keys: [{ kty: 'EC', crv: 'P-256', kid: 'test' }] })); 21 + 22 + await db 23 + .insert(profiles) 24 + .values({ 25 + did: testDid, 26 + handle: 'skill-link-profile.bsky.social', 27 + headline: 'Test Profile', 28 + createdAt: new Date(), 29 + }) 30 + .onConflictDoNothing(); 31 + 32 + await db 33 + .insert(positions) 34 + .values({ 35 + did: testDid, 36 + rkey: '3pos1', 37 + companyName: 'Acme', 38 + title: 'Engineer', 39 + startDate: '2024-01', 40 + current: true, 41 + createdAt: new Date(), 42 + }) 43 + .onConflictDoNothing(); 44 + 45 + await db 46 + .insert(skills) 47 + .values({ 48 + did: testDid, 49 + rkey: '3skill1', 50 + skillName: 'TypeScript', 51 + category: 'technical', 52 + createdAt: new Date(), 53 + }) 54 + .onConflictDoNothing(); 55 + 56 + await db 57 + .insert(skillPositionLinks) 58 + .values({ 59 + did: testDid, 60 + positionRkey: '3pos1', 61 + skillRkey: '3skill1', 62 + }) 63 + .onConflictDoNothing(); 64 + 65 + app = await buildServer({ 66 + NODE_ENV: 'test', 67 + PORT: 0, 68 + PUBLIC_URL: 'http://localhost:3100', 69 + DATABASE_URL: process.env.DATABASE_URL ?? 'postgresql://sifa:sifa@localhost:5432/sifa', 70 + VALKEY_URL: 'redis://localhost:6379', 71 + SIFA_DID: 'did:plc:test', 72 + JETSTREAM_URL: 'wss://jetstream1.us-east.bsky.network/subscribe', 73 + OAUTH_JWKS_PATH: jwksPath, 74 + }); 75 + }); 76 + 77 + afterAll(async () => { 78 + await db.execute(sql`DELETE FROM skill_position_links WHERE did = ${testDid}`); 79 + await db.execute(sql`DELETE FROM positions WHERE did = ${testDid}`); 80 + await db.execute(sql`DELETE FROM skills WHERE did = ${testDid}`); 81 + await db.execute(sql`DELETE FROM profiles WHERE did = ${testDid}`); 82 + await db.$client.end(); 83 + await app.close(); 84 + rmSync(tmpKeysDir, { recursive: true }); 85 + }); 86 + 87 + it('includes positionRkeys on skills in profile response', async () => { 88 + const res = await app.inject({ 89 + method: 'GET', 90 + url: `/api/profile/${testDid}`, 91 + }); 92 + expect(res.statusCode).toBe(200); 93 + const body = res.json(); 94 + const skill = body.skills.find((s: { rkey: string }) => s.rkey === '3skill1'); 95 + expect(skill).toBeDefined(); 96 + expect(skill.positionRkeys).toEqual(['3pos1']); 97 + }); 98 + 99 + it('includes skillRkeys on positions in profile response', async () => { 100 + const res = await app.inject({ 101 + method: 'GET', 102 + url: `/api/profile/${testDid}`, 103 + }); 104 + expect(res.statusCode).toBe(200); 105 + const body = res.json(); 106 + const position = body.positions.find((p: { rkey: string }) => p.rkey === '3pos1'); 107 + expect(position).toBeDefined(); 108 + expect(position.skillRkeys).toEqual(['3skill1']); 109 + }); 110 + });
+143
tests/routes/skills.test.ts
··· 1 + import { describe, it, expect, beforeAll, afterAll } from 'vitest'; 2 + import { buildServer } from '../../src/server.js'; 3 + import { createDb } from '../../src/db/index.js'; 4 + import { canonicalSkills } from '../../src/db/schema/index.js'; 5 + import { sql } from 'drizzle-orm'; 6 + import { writeFileSync, mkdirSync, rmSync } from 'node:fs'; 7 + import { tmpdir } from 'node:os'; 8 + import { join } from 'node:path'; 9 + import type { FastifyInstance } from 'fastify'; 10 + 11 + describe('Skills Search API', () => { 12 + let app: FastifyInstance; 13 + const db = createDb(process.env.DATABASE_URL ?? 'postgresql://sifa:sifa@localhost:5432/sifa'); 14 + const tmpKeysDir = join(tmpdir(), `sifa-test-keys-skills-${Date.now()}`); 15 + const jwksPath = join(tmpKeysDir, 'jwks.json'); 16 + 17 + beforeAll(async () => { 18 + mkdirSync(tmpKeysDir, { recursive: true }); 19 + writeFileSync(jwksPath, JSON.stringify({ keys: [{ kty: 'EC', crv: 'P-256', kid: 'test' }] })); 20 + 21 + // Ensure the table exists (migration may not have run in test mode) 22 + await db.execute(sql` 23 + CREATE TABLE IF NOT EXISTS "canonical_skills" ( 24 + "id" uuid PRIMARY KEY DEFAULT gen_random_uuid(), 25 + "canonical_name" text NOT NULL UNIQUE, 26 + "slug" text NOT NULL UNIQUE, 27 + "category" text, 28 + "subcategory" text, 29 + "aliases" text[] NOT NULL DEFAULT '{}', 30 + "wikidata_id" text, 31 + "user_count" integer NOT NULL DEFAULT 0 32 + ) 33 + `); 34 + 35 + // Seed canonical skills for search 36 + await db 37 + .insert(canonicalSkills) 38 + .values([ 39 + { 40 + canonicalName: 'JavaScript', 41 + slug: 'javascript', 42 + category: 'technical', 43 + aliases: ['js', 'javascript'], 44 + userCount: 150, 45 + }, 46 + { 47 + canonicalName: 'TypeScript', 48 + slug: 'typescript', 49 + category: 'technical', 50 + aliases: ['ts', 'typescript'], 51 + userCount: 120, 52 + }, 53 + { 54 + canonicalName: 'Java', 55 + slug: 'java', 56 + category: 'technical', 57 + aliases: ['java'], 58 + userCount: 80, 59 + }, 60 + { 61 + canonicalName: 'Python', 62 + slug: 'python', 63 + category: 'technical', 64 + aliases: ['python', 'py'], 65 + userCount: 200, 66 + }, 67 + { 68 + canonicalName: 'Project Management', 69 + slug: 'project-management', 70 + category: 'business', 71 + aliases: ['project management', 'pm'], 72 + userCount: 50, 73 + }, 74 + ]) 75 + .onConflictDoNothing(); 76 + 77 + app = await buildServer({ 78 + NODE_ENV: 'test', 79 + PORT: 0, 80 + PUBLIC_URL: 'http://localhost:3100', 81 + DATABASE_URL: process.env.DATABASE_URL ?? 'postgresql://sifa:sifa@localhost:5432/sifa', 82 + VALKEY_URL: 'redis://localhost:6379', 83 + SIFA_DID: 'did:plc:test', 84 + JETSTREAM_URL: 'wss://jetstream1.us-east.bsky.network/subscribe', 85 + OAUTH_JWKS_PATH: jwksPath, 86 + }); 87 + }); 88 + 89 + afterAll(async () => { 90 + await db.execute( 91 + sql`DELETE FROM canonical_skills WHERE slug IN ('javascript', 'typescript', 'java', 'python', 'project-management')`, 92 + ); 93 + await db.$client.end(); 94 + await app.close(); 95 + rmSync(tmpKeysDir, { recursive: true }); 96 + }); 97 + 98 + it('GET /api/skills/search returns matching skills ordered by similarity and user_count', async () => { 99 + const res = await app.inject({ method: 'GET', url: '/api/skills/search?q=java' }); 100 + expect(res.statusCode).toBe(200); 101 + const body = res.json(); 102 + expect(body.skills.length).toBeGreaterThanOrEqual(2); 103 + const names = body.skills.map((s: { canonicalName: string }) => s.canonicalName); 104 + expect(names).toContain('JavaScript'); 105 + expect(names).toContain('Java'); 106 + }); 107 + 108 + it('GET /api/skills/search returns 400 without query', async () => { 109 + const res = await app.inject({ method: 'GET', url: '/api/skills/search' }); 110 + expect(res.statusCode).toBe(400); 111 + }); 112 + 113 + it('GET /api/skills/search returns empty for no matches', async () => { 114 + const res = await app.inject({ method: 'GET', url: '/api/skills/search?q=xyznonexistent999' }); 115 + expect(res.statusCode).toBe(200); 116 + expect(res.json().skills).toHaveLength(0); 117 + }); 118 + 119 + it('GET /api/skills/search respects limit parameter', async () => { 120 + const res = await app.inject({ method: 'GET', url: '/api/skills/search?q=java&limit=1' }); 121 + expect(res.statusCode).toBe(200); 122 + expect(res.json().skills).toHaveLength(1); 123 + }); 124 + 125 + it('GET /api/skills/search includes category and user_count', async () => { 126 + const res = await app.inject({ method: 'GET', url: '/api/skills/search?q=python' }); 127 + const body = res.json(); 128 + const python = body.skills.find((s: { canonicalName: string }) => s.canonicalName === 'Python'); 129 + expect(python).toBeDefined(); 130 + expect(python.category).toBe('technical'); 131 + expect(python.userCount).toBe(200); 132 + expect(python.slug).toBe('python'); 133 + }); 134 + 135 + it('GET /api/skills/search matches aliases', async () => { 136 + const res = await app.inject({ method: 'GET', url: '/api/skills/search?q=pm' }); 137 + const body = res.json(); 138 + const pm = body.skills.find( 139 + (s: { canonicalName: string }) => s.canonicalName === 'Project Management', 140 + ); 141 + expect(pm).toBeDefined(); 142 + }); 143 + });
+64
tests/services/skill-normalization-db.test.ts
··· 1 + import { describe, it, expect, beforeAll, afterAll } from 'vitest'; 2 + import { createDb } from '../../src/db/index.js'; 3 + import { canonicalSkills, unresolvedSkills } from '../../src/db/schema/index.js'; 4 + import { resolveSkill } from '../../src/services/skill-normalization.js'; 5 + import { eq, sql } from 'drizzle-orm'; 6 + 7 + describe('resolveSkill (integration)', () => { 8 + const db = createDb(process.env.DATABASE_URL ?? 'postgresql://sifa:sifa@localhost:5432/sifa'); 9 + 10 + beforeAll(async () => { 11 + await db 12 + .insert(canonicalSkills) 13 + .values({ 14 + canonicalName: 'JavaScript', 15 + slug: 'javascript', 16 + category: 'technical', 17 + aliases: ['js', 'javascript', 'ecmascript', 'java script'], 18 + userCount: 0, 19 + }) 20 + .onConflictDoNothing(); 21 + }); 22 + 23 + afterAll(async () => { 24 + await db.delete(canonicalSkills).where(eq(canonicalSkills.slug, 'javascript')); 25 + await db.execute( 26 + sql`DELETE FROM unresolved_skills WHERE normalized_name IN ('javascript', 'completely-unknown-skill-xyz')`, 27 + ); 28 + await db.$client.end(); 29 + }); 30 + 31 + it('resolves an alias to canonical skill', async () => { 32 + const result = await resolveSkill(db, 'JS'); 33 + expect(result).not.toBeNull(); 34 + expect(result?.canonicalName).toBe('JavaScript'); 35 + expect(result?.slug).toBe('javascript'); 36 + }); 37 + 38 + it('resolves exact canonical name', async () => { 39 + const result = await resolveSkill(db, 'JavaScript'); 40 + expect(result).not.toBeNull(); 41 + expect(result?.canonicalName).toBe('JavaScript'); 42 + }); 43 + 44 + it('returns null and queues unresolved skill when no match found', async () => { 45 + const result = await resolveSkill(db, 'completely-unknown-skill-xyz'); 46 + expect(result).toBeNull(); 47 + 48 + const unresolved = await db 49 + .select() 50 + .from(unresolvedSkills) 51 + .where(eq(unresolvedSkills.normalizedName, 'completely-unknown-skill-xyz')); 52 + expect(unresolved).toHaveLength(1); 53 + expect(unresolved[0].occurrences).toBe(1); 54 + }); 55 + 56 + it('increments occurrence count for repeated unresolved skills', async () => { 57 + await resolveSkill(db, 'completely-unknown-skill-xyz'); 58 + const unresolved = await db 59 + .select() 60 + .from(unresolvedSkills) 61 + .where(eq(unresolvedSkills.normalizedName, 'completely-unknown-skill-xyz')); 62 + expect(unresolved[0].occurrences).toBe(2); 63 + }); 64 + });
+36
tests/services/skill-normalization.test.ts
··· 1 + import { describe, it, expect } from 'vitest'; 2 + import { normalizeSkillName, createSlug } from '../../src/services/skill-normalization.js'; 3 + 4 + describe('normalizeSkillName', () => { 5 + it('lowercases and trims input', () => { 6 + expect(normalizeSkillName(' React.js ')).toBe('react.js'); 7 + }); 8 + 9 + it('handles mixed case', () => { 10 + expect(normalizeSkillName('TypeScript')).toBe('typescript'); 11 + }); 12 + 13 + it('preserves dots and hyphens', () => { 14 + expect(normalizeSkillName('Node.js')).toBe('node.js'); 15 + expect(normalizeSkillName('vue-router')).toBe('vue-router'); 16 + }); 17 + 18 + it('collapses multiple spaces', () => { 19 + expect(normalizeSkillName('machine learning')).toBe('machine learning'); 20 + }); 21 + }); 22 + 23 + describe('createSlug', () => { 24 + it('creates url-safe slug from skill name', () => { 25 + expect(createSlug('React.js')).toBe('react-js'); 26 + }); 27 + 28 + it('handles spaces and special chars', () => { 29 + expect(createSlug('C++')).toBe('c-plus-plus'); 30 + expect(createSlug('C#')).toBe('c-sharp'); 31 + }); 32 + 33 + it('collapses multiple hyphens', () => { 34 + expect(createSlug('Node.js / Express')).toBe('node-js-express'); 35 + }); 36 + });