A social knowledge tool for researchers built on ATProto
at main 142 lines 4.3 kB view raw
1import { Index } from '@upstash/vector'; 2import { Result, ok, err } from '../../../shared/core/Result'; 3import { 4 IVectorDatabase, 5 IndexUrlParams, 6 FindSimilarUrlsParams, 7 UrlSearchResult, 8} from '../domain/IVectorDatabase'; 9import { UrlMetadataProps } from '../../cards/domain/value-objects/UrlMetadata'; 10 11interface UpstashMetadata extends UrlMetadataProps { 12 [key: string]: any; // Add this index signature for additional flexibility 13} 14 15export class UpstashVectorDatabase implements IVectorDatabase { 16 private index: Index<UpstashMetadata>; 17 18 constructor(url: string, token: string) { 19 this.index = new Index<UpstashMetadata>({ 20 url, 21 token, 22 }); 23 } 24 25 async indexUrl(params: IndexUrlParams): Promise<Result<void>> { 26 try { 27 // Combine title and description for the data field 28 const dataContent = [params.title, params.description] 29 .filter(Boolean) 30 .join(' '); 31 32 await this.index.upsert({ 33 id: params.url, 34 data: dataContent || params.url, // Fallback to URL if no content 35 metadata: { 36 url: params.url, 37 title: params.title, 38 description: params.description, 39 author: params.author, 40 publishedDate: params.publishedDate, 41 siteName: params.siteName, 42 imageUrl: params.imageUrl, 43 type: params.type, 44 }, 45 }); 46 return ok(undefined); 47 } catch (error) { 48 return err( 49 new Error( 50 `Failed to index URL: ${error instanceof Error ? error.message : 'Unknown error'}`, 51 ), 52 ); 53 } 54 } 55 56 async findSimilarUrls( 57 params: FindSimilarUrlsParams, 58 ): Promise<Result<UrlSearchResult[]>> { 59 try { 60 // Get the query URL's content for comparison 61 // We'll use the URL itself as the query data for now 62 // In a more sophisticated implementation, we could fetch the indexed data 63 const queryData = params.url; 64 65 // Fetch top 100 results (naive pagination approach) 66 const topK = Math.min(params.limit * 10, 100); // Get more results for pagination 67 68 const queryResult = await this.index.query({ 69 data: queryData, 70 topK, 71 includeMetadata: true, 72 includeVectors: false, // We don't need the vectors in the response 73 }); 74 75 // Filter out the query URL itself and apply threshold 76 const threshold = params.threshold || 0; 77 const results: UrlSearchResult[] = []; 78 79 for (const result of queryResult) { 80 // Skip the query URL itself 81 if (result.id === params.url) continue; 82 83 // Apply threshold filter 84 if (result.score < threshold) continue; 85 86 results.push({ 87 url: result.id as string, // Cast to string since we use URLs as IDs 88 similarity: result.score, 89 metadata: { 90 url: result.metadata?.url || (result.id as string), 91 title: result.metadata?.title, 92 description: result.metadata?.description, 93 author: result.metadata?.author, 94 publishedDate: result.metadata?.publishedDate, 95 siteName: result.metadata?.siteName, 96 imageUrl: result.metadata?.imageUrl, 97 type: result.metadata?.type, 98 }, 99 }); 100 } 101 102 // Sort by similarity (highest first) and limit results 103 results.sort((a, b) => b.similarity - a.similarity); 104 const limitedResults = results.slice(0, params.limit); 105 106 return ok(limitedResults); 107 } catch (error) { 108 return err( 109 new Error( 110 `Failed to find similar URLs: ${error instanceof Error ? error.message : 'Unknown error'}`, 111 ), 112 ); 113 } 114 } 115 116 async deleteUrl(url: string): Promise<Result<void>> { 117 try { 118 await this.index.delete(url); 119 return ok(undefined); 120 } catch (error) { 121 return err( 122 new Error( 123 `Failed to delete URL: ${error instanceof Error ? error.message : 'Unknown error'}`, 124 ), 125 ); 126 } 127 } 128 129 async healthCheck(): Promise<Result<boolean>> { 130 try { 131 // Try to get index info as a health check 132 await this.index.info(); 133 return ok(true); 134 } catch (error) { 135 return err( 136 new Error( 137 `Health check failed: ${error instanceof Error ? error.message : 'Unknown error'}`, 138 ), 139 ); 140 } 141 } 142}