A social knowledge tool for researchers built on ATProto

formatting

Changed files
+68 -31
docs
architecture
src
modules
webapp
api-client
clients
+28 -12
docs/architecture/url_semantic_search.md
··· 108 ```typescript 109 export interface IVectorDatabase { 110 indexUrl(params: IndexUrlParams): Promise<Result<void>>; 111 - findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>>; 112 deleteUrl(url: string): Promise<Result<void>>; 113 } 114 ··· 178 ``` 179 180 **Configuration:** 181 - `USE_IN_MEMORY_EVENTS=false` 182 - `VECTOR_DB_URL` configured (Pinecone, Weaviate, etc.) 183 - Separate search worker process ··· 217 ``` 218 219 **Configuration:** 220 - `USE_IN_MEMORY_EVENTS=false` 221 - Local vector DB via Docker (Weaviate/Qdrant) 222 - Both web app and search worker in same process ··· 250 ``` 251 252 **Configuration:** 253 - `USE_IN_MEMORY_EVENTS=true` 254 - `USE_MOCK_VECTOR_DB=true` 255 - No external vector DB required ··· 326 } 327 328 // 2. Enrich with library counts and user context 329 - const enrichedUrls = await this.enrichUrlsWithContext( 330 - similarResult.value, 331 - ); 332 333 return ok(enrichedUrls); 334 } ··· 363 // Generate embeddings and upsert to Pinecone 364 } 365 366 - async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> { 367 // Query Pinecone for similar vectors 368 } 369 } ··· 379 // Index in local Weaviate instance 380 } 381 382 - async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> { 383 // Query local Weaviate instance 384 } 385 } ··· 401 return ok(undefined); 402 } 403 404 - async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> { 405 // Simple text similarity using keyword matching 406 const results = Array.from(this.urls.values()) 407 - .filter(indexed => this.calculateSimilarity(params.url, indexed.content) > 0.3) 408 - .map(indexed => ({ 409 url: indexed.url, 410 similarity: this.calculateSimilarity(params.url, indexed.content), 411 metadata: indexed.metadata, ··· 420 // Simple keyword-based similarity for mocking 421 const queryWords = query.toLowerCase().split(/\W+/); 422 const contentWords = content.toLowerCase().split(/\W+/); 423 - const intersection = queryWords.filter(word => contentWords.includes(word)); 424 - return intersection.length / Math.max(queryWords.length, contentWords.length); 425 } 426 } 427 ``` ··· 500 limit: parseInt(req.query.limit) || 10, 501 page: parseInt(req.query.page) || 1, 502 }); 503 - 504 res.json(result); 505 }); 506 ```
··· 108 ```typescript 109 export interface IVectorDatabase { 110 indexUrl(params: IndexUrlParams): Promise<Result<void>>; 111 + findSimilarUrls( 112 + params: FindSimilarUrlsParams, 113 + ): Promise<Result<UrlSearchResult[]>>; 114 deleteUrl(url: string): Promise<Result<void>>; 115 } 116 ··· 180 ``` 181 182 **Configuration:** 183 + 184 - `USE_IN_MEMORY_EVENTS=false` 185 - `VECTOR_DB_URL` configured (Pinecone, Weaviate, etc.) 186 - Separate search worker process ··· 220 ``` 221 222 **Configuration:** 223 + 224 - `USE_IN_MEMORY_EVENTS=false` 225 - Local vector DB via Docker (Weaviate/Qdrant) 226 - Both web app and search worker in same process ··· 254 ``` 255 256 **Configuration:** 257 + 258 - `USE_IN_MEMORY_EVENTS=true` 259 - `USE_MOCK_VECTOR_DB=true` 260 - No external vector DB required ··· 331 } 332 333 // 2. Enrich with library counts and user context 334 + const enrichedUrls = await this.enrichUrlsWithContext(similarResult.value); 335 336 return ok(enrichedUrls); 337 } ··· 366 // Generate embeddings and upsert to Pinecone 367 } 368 369 + async findSimilarUrls( 370 + params: FindSimilarUrlsParams, 371 + ): Promise<Result<UrlSearchResult[]>> { 372 // Query Pinecone for similar vectors 373 } 374 } ··· 384 // Index in local Weaviate instance 385 } 386 387 + async findSimilarUrls( 388 + params: FindSimilarUrlsParams, 389 + ): Promise<Result<UrlSearchResult[]>> { 390 // Query local Weaviate instance 391 } 392 } ··· 408 return ok(undefined); 409 } 410 411 + async findSimilarUrls( 412 + params: FindSimilarUrlsParams, 413 + ): Promise<Result<UrlSearchResult[]>> { 414 // Simple text similarity using keyword matching 415 const results = Array.from(this.urls.values()) 416 + .filter( 417 + (indexed) => 418 + this.calculateSimilarity(params.url, indexed.content) > 0.3, 419 + ) 420 + .map((indexed) => ({ 421 url: indexed.url, 422 similarity: this.calculateSimilarity(params.url, indexed.content), 423 metadata: indexed.metadata, ··· 432 // Simple keyword-based similarity for mocking 433 const queryWords = query.toLowerCase().split(/\W+/); 434 const contentWords = content.toLowerCase().split(/\W+/); 435 + const intersection = queryWords.filter((word) => 436 + contentWords.includes(word), 437 + ); 438 + return ( 439 + intersection.length / Math.max(queryWords.length, contentWords.length) 440 + ); 441 } 442 } 443 ``` ··· 516 limit: parseInt(req.query.limit) || 10, 517 page: parseInt(req.query.page) || 1, 518 }); 519 + 520 res.json(result); 521 }); 522 ```
+2 -1
src/modules/atproto/tests/infrastructure/InMemoryAppPasswordSessionRepository.ts
··· 14 15 public static getInstance(): InMemoryAppPasswordSessionRepository { 16 if (!InMemoryAppPasswordSessionRepository.instance) { 17 - InMemoryAppPasswordSessionRepository.instance = new InMemoryAppPasswordSessionRepository(); 18 } 19 return InMemoryAppPasswordSessionRepository.instance; 20 }
··· 14 15 public static getInstance(): InMemoryAppPasswordSessionRepository { 16 if (!InMemoryAppPasswordSessionRepository.instance) { 17 + InMemoryAppPasswordSessionRepository.instance = 18 + new InMemoryAppPasswordSessionRepository(); 19 } 20 return InMemoryAppPasswordSessionRepository.instance; 21 }
+2 -1
src/modules/cards/tests/utils/InMemoryCollectionRepository.ts
··· 13 14 public static getInstance(): InMemoryCollectionRepository { 15 if (!InMemoryCollectionRepository.instance) { 16 - InMemoryCollectionRepository.instance = new InMemoryCollectionRepository(); 17 } 18 return InMemoryCollectionRepository.instance; 19 }
··· 13 14 public static getInstance(): InMemoryCollectionRepository { 15 if (!InMemoryCollectionRepository.instance) { 16 + InMemoryCollectionRepository.instance = 17 + new InMemoryCollectionRepository(); 18 } 19 return InMemoryCollectionRepository.instance; 20 }
+8 -2
src/modules/search/application/eventHandlers/CardAddedToLibraryEventHandler.ts
··· 16 // Get card details to check if it's a URL card 17 const cardResult = await this.cardRepository.findById(event.cardId); 18 if (cardResult.isErr()) { 19 - console.error('Failed to find card for search indexing:', cardResult.error); 20 return ok(undefined); // Don't fail the event processing 21 } 22 23 const card = cardResult.value; 24 if (!card) { 25 - console.warn('Card not found for search indexing:', event.cardId.getStringValue()); 26 return ok(undefined); 27 } 28
··· 16 // Get card details to check if it's a URL card 17 const cardResult = await this.cardRepository.findById(event.cardId); 18 if (cardResult.isErr()) { 19 + console.error( 20 + 'Failed to find card for search indexing:', 21 + cardResult.error, 22 + ); 23 return ok(undefined); // Don't fail the event processing 24 } 25 26 const card = cardResult.value; 27 if (!card) { 28 + console.warn( 29 + 'Card not found for search indexing:', 30 + event.cardId.getStringValue(), 31 + ); 32 return ok(undefined); 33 } 34
+4 -1
src/modules/search/application/useCases/commands/IndexUrlForSearchUseCase.ts
··· 24 implements 25 UseCase< 26 IndexUrlForSearchDTO, 27 - Result<IndexUrlForSearchResponseDTO, ValidationError | AppError.UnexpectedError> 28 > 29 { 30 constructor(private searchService: SearchService) {}
··· 24 implements 25 UseCase< 26 IndexUrlForSearchDTO, 27 + Result< 28 + IndexUrlForSearchResponseDTO, 29 + ValidationError | AppError.UnexpectedError 30 + > 31 > 32 { 33 constructor(private searchService: SearchService) {}
+4 -1
src/modules/search/application/useCases/queries/GetSimilarUrlsForUrlUseCase.ts
··· 5 import { URL } from '../../../../cards/domain/value-objects/URL'; 6 import { SearchService } from '../../../domain/services/SearchService'; 7 import { GetSimilarUrlsForUrlParams } from '@semble/types/api/requests'; 8 - import { GetSimilarUrlsForUrlResponse, UrlView } from '@semble/types/api/responses'; 9 import { Pagination } from '@semble/types/api/common'; 10 11 export interface GetSimilarUrlsForUrlQuery extends GetSimilarUrlsForUrlParams {
··· 5 import { URL } from '../../../../cards/domain/value-objects/URL'; 6 import { SearchService } from '../../../domain/services/SearchService'; 7 import { GetSimilarUrlsForUrlParams } from '@semble/types/api/requests'; 8 + import { 9 + GetSimilarUrlsForUrlResponse, 10 + UrlView, 11 + } from '@semble/types/api/responses'; 12 import { Pagination } from '@semble/types/api/common'; 13 14 export interface GetSimilarUrlsForUrlQuery extends GetSimilarUrlsForUrlParams {
+3 -1
src/modules/search/domain/IVectorDatabase.ts
··· 33 /** 34 * Find URLs similar to the given URL 35 */ 36 - findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>>; 37 38 /** 39 * Remove a URL from the search index
··· 33 /** 34 * Find URLs similar to the given URL 35 */ 36 + findSimilarUrls( 37 + params: FindSimilarUrlsParams, 38 + ): Promise<Result<UrlSearchResult[]>>; 39 40 /** 41 * Remove a URL from the search index
+4 -6
src/modules/search/domain/services/SearchService.ts
··· 124 const enrichedResults = await Promise.all( 125 searchResults.map(async (result) => { 126 // Get library information for this URL 127 - const librariesResult = await this.cardQueryRepository.getLibrariesForUrl( 128 - result.url, 129 - { 130 page: 1, 131 limit: 1000, // Get all libraries to count them 132 sortBy: 'createdAt' as any, // Type assertion needed due to enum mismatch 133 sortOrder: 'desc' as any, 134 - }, 135 - ); 136 137 const urlLibraryCount = librariesResult.totalCount; 138 - 139 // Check if calling user has this URL in their library 140 // Default to false if no calling user (unauthenticated request) 141 const urlInLibrary = callingUserId
··· 124 const enrichedResults = await Promise.all( 125 searchResults.map(async (result) => { 126 // Get library information for this URL 127 + const librariesResult = 128 + await this.cardQueryRepository.getLibrariesForUrl(result.url, { 129 page: 1, 130 limit: 1000, // Get all libraries to count them 131 sortBy: 'createdAt' as any, // Type assertion needed due to enum mismatch 132 sortOrder: 'desc' as any, 133 + }); 134 135 const urlLibraryCount = librariesResult.totalCount; 136 + 137 // Check if calling user has this URL in their library 138 // Default to false if no calling user (unauthenticated request) 139 const urlInLibrary = callingUserId
+8 -4
src/modules/search/infrastructure/InMemoryVectorDatabase.ts
··· 60 ): Promise<Result<UrlSearchResult[]>> { 61 try { 62 console.log('all urls to compare', this.urls); 63 - const threshold = params.threshold || 0.1; // Lower default threshold for more matches 64 const results: UrlSearchResult[] = []; 65 66 // Get the query URL's content for comparison ··· 78 indexed.content, 79 ); 80 81 - console.log(`Similarity between "${queryContent}" and "${indexed.content}": ${similarity}`); 82 83 if (similarity >= threshold) { 84 results.push({ ··· 93 results.sort((a, b) => b.similarity - a.similarity); 94 const limitedResults = results.slice(0, params.limit); 95 96 - console.log(`Found ${limitedResults.length} similar URLs above threshold ${threshold}`); 97 98 return ok(limitedResults); 99 } catch (error) { ··· 144 for (const word of new Set([...words1, ...words2])) { 145 const count1 = freq1.get(word) || 0; 146 const count2 = freq2.get(word) || 0; 147 - 148 if (count1 > 0 && count2 > 0) { 149 sharedWords += Math.min(count1, count2); 150 }
··· 60 ): Promise<Result<UrlSearchResult[]>> { 61 try { 62 console.log('all urls to compare', this.urls); 63 + const threshold = params.threshold || 0; // Lower default threshold for more matches 64 const results: UrlSearchResult[] = []; 65 66 // Get the query URL's content for comparison ··· 78 indexed.content, 79 ); 80 81 + console.log( 82 + `Similarity between "${queryContent}" and "${indexed.content}": ${similarity}`, 83 + ); 84 85 if (similarity >= threshold) { 86 results.push({ ··· 95 results.sort((a, b) => b.similarity - a.similarity); 96 const limitedResults = results.slice(0, params.limit); 97 98 + console.log( 99 + `Found ${limitedResults.length} similar URLs above threshold ${threshold}`, 100 + ); 101 102 return ok(limitedResults); 103 } catch (error) { ··· 148 for (const word of new Set([...words1, ...words2])) { 149 const count1 = freq1.get(word) || 0; 150 const count2 = freq2.get(word) || 0; 151 + 152 if (count1 > 0 && count2 > 0) { 153 sharedWords += Math.min(count1, count2); 154 }
+3 -1
src/modules/search/infrastructure/http/controllers/GetSimilarUrlsForUrlController.ts
··· 4 import { AuthenticatedRequest } from '../../../../../shared/infrastructure/http/middleware/AuthMiddleware'; 5 6 export class GetSimilarUrlsForUrlController extends Controller { 7 - constructor(private getSimilarUrlsForUrlUseCase: GetSimilarUrlsForUrlUseCase) { 8 super(); 9 } 10
··· 4 import { AuthenticatedRequest } from '../../../../../shared/infrastructure/http/middleware/AuthMiddleware'; 5 6 export class GetSimilarUrlsForUrlController extends Controller { 7 + constructor( 8 + private getSimilarUrlsForUrlUseCase: GetSimilarUrlsForUrlUseCase, 9 + ) { 10 super(); 11 } 12
+2 -1
src/webapp/api-client/clients/QueryClient.ts
··· 231 if (params.limit) searchParams.set('limit', params.limit.toString()); 232 if (params.sortBy) searchParams.set('sortBy', params.sortBy); 233 if (params.sortOrder) searchParams.set('sortOrder', params.sortOrder); 234 - if (params.threshold) searchParams.set('threshold', params.threshold.toString()); 235 236 return this.request<GetSimilarUrlsForUrlResponse>( 237 'GET',
··· 231 if (params.limit) searchParams.set('limit', params.limit.toString()); 232 if (params.sortBy) searchParams.set('sortBy', params.sortBy); 233 if (params.sortOrder) searchParams.set('sortOrder', params.sortOrder); 234 + if (params.threshold) 235 + searchParams.set('threshold', params.threshold.toString()); 236 237 return this.request<GetSimilarUrlsForUrlResponse>( 238 'GET',