A social knowledge tool for researchers built on ATProto

formatting

Changed files
+68 -31
docs
architecture
src
modules
webapp
api-client
clients
+28 -12
docs/architecture/url_semantic_search.md
··· 108 108 ```typescript 109 109 export interface IVectorDatabase { 110 110 indexUrl(params: IndexUrlParams): Promise<Result<void>>; 111 - findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>>; 111 + findSimilarUrls( 112 + params: FindSimilarUrlsParams, 113 + ): Promise<Result<UrlSearchResult[]>>; 112 114 deleteUrl(url: string): Promise<Result<void>>; 113 115 } 114 116 ··· 178 180 ``` 179 181 180 182 **Configuration:** 183 + 181 184 - `USE_IN_MEMORY_EVENTS=false` 182 185 - `VECTOR_DB_URL` configured (Pinecone, Weaviate, etc.) 183 186 - Separate search worker process ··· 217 220 ``` 218 221 219 222 **Configuration:** 223 + 220 224 - `USE_IN_MEMORY_EVENTS=false` 221 225 - Local vector DB via Docker (Weaviate/Qdrant) 222 226 - Both web app and search worker in same process ··· 250 254 ``` 251 255 252 256 **Configuration:** 257 + 253 258 - `USE_IN_MEMORY_EVENTS=true` 254 259 - `USE_MOCK_VECTOR_DB=true` 255 260 - No external vector DB required ··· 326 331 } 327 332 328 333 // 2. Enrich with library counts and user context 329 - const enrichedUrls = await this.enrichUrlsWithContext( 330 - similarResult.value, 331 - ); 334 + const enrichedUrls = await this.enrichUrlsWithContext(similarResult.value); 332 335 333 336 return ok(enrichedUrls); 334 337 } ··· 363 366 // Generate embeddings and upsert to Pinecone 364 367 } 365 368 366 - async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> { 369 + async findSimilarUrls( 370 + params: FindSimilarUrlsParams, 371 + ): Promise<Result<UrlSearchResult[]>> { 367 372 // Query Pinecone for similar vectors 368 373 } 369 374 } ··· 379 384 // Index in local Weaviate instance 380 385 } 381 386 382 - async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> { 387 + async findSimilarUrls( 388 + params: FindSimilarUrlsParams, 389 + ): Promise<Result<UrlSearchResult[]>> { 383 390 // Query local Weaviate instance 384 391 } 385 392 } ··· 401 408 return ok(undefined); 402 409 } 403 410 404 - async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> { 411 + async findSimilarUrls( 412 + params: FindSimilarUrlsParams, 413 + ): Promise<Result<UrlSearchResult[]>> { 405 414 // Simple text similarity using keyword matching 406 415 const results = Array.from(this.urls.values()) 407 - .filter(indexed => this.calculateSimilarity(params.url, indexed.content) > 0.3) 408 - .map(indexed => ({ 416 + .filter( 417 + (indexed) => 418 + this.calculateSimilarity(params.url, indexed.content) > 0.3, 419 + ) 420 + .map((indexed) => ({ 409 421 url: indexed.url, 410 422 similarity: this.calculateSimilarity(params.url, indexed.content), 411 423 metadata: indexed.metadata, ··· 420 432 // Simple keyword-based similarity for mocking 421 433 const queryWords = query.toLowerCase().split(/\W+/); 422 434 const contentWords = content.toLowerCase().split(/\W+/); 423 - const intersection = queryWords.filter(word => contentWords.includes(word)); 424 - return intersection.length / Math.max(queryWords.length, contentWords.length); 435 + const intersection = queryWords.filter((word) => 436 + contentWords.includes(word), 437 + ); 438 + return ( 439 + intersection.length / Math.max(queryWords.length, contentWords.length) 440 + ); 425 441 } 426 442 } 427 443 ``` ··· 500 516 limit: parseInt(req.query.limit) || 10, 501 517 page: parseInt(req.query.page) || 1, 502 518 }); 503 - 519 + 504 520 res.json(result); 505 521 }); 506 522 ```
+2 -1
src/modules/atproto/tests/infrastructure/InMemoryAppPasswordSessionRepository.ts
··· 14 14 15 15 public static getInstance(): InMemoryAppPasswordSessionRepository { 16 16 if (!InMemoryAppPasswordSessionRepository.instance) { 17 - InMemoryAppPasswordSessionRepository.instance = new InMemoryAppPasswordSessionRepository(); 17 + InMemoryAppPasswordSessionRepository.instance = 18 + new InMemoryAppPasswordSessionRepository(); 18 19 } 19 20 return InMemoryAppPasswordSessionRepository.instance; 20 21 }
+2 -1
src/modules/cards/tests/utils/InMemoryCollectionRepository.ts
··· 13 13 14 14 public static getInstance(): InMemoryCollectionRepository { 15 15 if (!InMemoryCollectionRepository.instance) { 16 - InMemoryCollectionRepository.instance = new InMemoryCollectionRepository(); 16 + InMemoryCollectionRepository.instance = 17 + new InMemoryCollectionRepository(); 17 18 } 18 19 return InMemoryCollectionRepository.instance; 19 20 }
+8 -2
src/modules/search/application/eventHandlers/CardAddedToLibraryEventHandler.ts
··· 16 16 // Get card details to check if it's a URL card 17 17 const cardResult = await this.cardRepository.findById(event.cardId); 18 18 if (cardResult.isErr()) { 19 - console.error('Failed to find card for search indexing:', cardResult.error); 19 + console.error( 20 + 'Failed to find card for search indexing:', 21 + cardResult.error, 22 + ); 20 23 return ok(undefined); // Don't fail the event processing 21 24 } 22 25 23 26 const card = cardResult.value; 24 27 if (!card) { 25 - console.warn('Card not found for search indexing:', event.cardId.getStringValue()); 28 + console.warn( 29 + 'Card not found for search indexing:', 30 + event.cardId.getStringValue(), 31 + ); 26 32 return ok(undefined); 27 33 } 28 34
+4 -1
src/modules/search/application/useCases/commands/IndexUrlForSearchUseCase.ts
··· 24 24 implements 25 25 UseCase< 26 26 IndexUrlForSearchDTO, 27 - Result<IndexUrlForSearchResponseDTO, ValidationError | AppError.UnexpectedError> 27 + Result< 28 + IndexUrlForSearchResponseDTO, 29 + ValidationError | AppError.UnexpectedError 30 + > 28 31 > 29 32 { 30 33 constructor(private searchService: SearchService) {}
+4 -1
src/modules/search/application/useCases/queries/GetSimilarUrlsForUrlUseCase.ts
··· 5 5 import { URL } from '../../../../cards/domain/value-objects/URL'; 6 6 import { SearchService } from '../../../domain/services/SearchService'; 7 7 import { GetSimilarUrlsForUrlParams } from '@semble/types/api/requests'; 8 - import { GetSimilarUrlsForUrlResponse, UrlView } from '@semble/types/api/responses'; 8 + import { 9 + GetSimilarUrlsForUrlResponse, 10 + UrlView, 11 + } from '@semble/types/api/responses'; 9 12 import { Pagination } from '@semble/types/api/common'; 10 13 11 14 export interface GetSimilarUrlsForUrlQuery extends GetSimilarUrlsForUrlParams {
+3 -1
src/modules/search/domain/IVectorDatabase.ts
··· 33 33 /** 34 34 * Find URLs similar to the given URL 35 35 */ 36 - findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>>; 36 + findSimilarUrls( 37 + params: FindSimilarUrlsParams, 38 + ): Promise<Result<UrlSearchResult[]>>; 37 39 38 40 /** 39 41 * Remove a URL from the search index
+4 -6
src/modules/search/domain/services/SearchService.ts
··· 124 124 const enrichedResults = await Promise.all( 125 125 searchResults.map(async (result) => { 126 126 // Get library information for this URL 127 - const librariesResult = await this.cardQueryRepository.getLibrariesForUrl( 128 - result.url, 129 - { 127 + const librariesResult = 128 + await this.cardQueryRepository.getLibrariesForUrl(result.url, { 130 129 page: 1, 131 130 limit: 1000, // Get all libraries to count them 132 131 sortBy: 'createdAt' as any, // Type assertion needed due to enum mismatch 133 132 sortOrder: 'desc' as any, 134 - }, 135 - ); 133 + }); 136 134 137 135 const urlLibraryCount = librariesResult.totalCount; 138 - 136 + 139 137 // Check if calling user has this URL in their library 140 138 // Default to false if no calling user (unauthenticated request) 141 139 const urlInLibrary = callingUserId
+8 -4
src/modules/search/infrastructure/InMemoryVectorDatabase.ts
··· 60 60 ): Promise<Result<UrlSearchResult[]>> { 61 61 try { 62 62 console.log('all urls to compare', this.urls); 63 - const threshold = params.threshold || 0.1; // Lower default threshold for more matches 63 + const threshold = params.threshold || 0; // Lower default threshold for more matches 64 64 const results: UrlSearchResult[] = []; 65 65 66 66 // Get the query URL's content for comparison ··· 78 78 indexed.content, 79 79 ); 80 80 81 - console.log(`Similarity between "${queryContent}" and "${indexed.content}": ${similarity}`); 81 + console.log( 82 + `Similarity between "${queryContent}" and "${indexed.content}": ${similarity}`, 83 + ); 82 84 83 85 if (similarity >= threshold) { 84 86 results.push({ ··· 93 95 results.sort((a, b) => b.similarity - a.similarity); 94 96 const limitedResults = results.slice(0, params.limit); 95 97 96 - console.log(`Found ${limitedResults.length} similar URLs above threshold ${threshold}`); 98 + console.log( 99 + `Found ${limitedResults.length} similar URLs above threshold ${threshold}`, 100 + ); 97 101 98 102 return ok(limitedResults); 99 103 } catch (error) { ··· 144 148 for (const word of new Set([...words1, ...words2])) { 145 149 const count1 = freq1.get(word) || 0; 146 150 const count2 = freq2.get(word) || 0; 147 - 151 + 148 152 if (count1 > 0 && count2 > 0) { 149 153 sharedWords += Math.min(count1, count2); 150 154 }
+3 -1
src/modules/search/infrastructure/http/controllers/GetSimilarUrlsForUrlController.ts
··· 4 4 import { AuthenticatedRequest } from '../../../../../shared/infrastructure/http/middleware/AuthMiddleware'; 5 5 6 6 export class GetSimilarUrlsForUrlController extends Controller { 7 - constructor(private getSimilarUrlsForUrlUseCase: GetSimilarUrlsForUrlUseCase) { 7 + constructor( 8 + private getSimilarUrlsForUrlUseCase: GetSimilarUrlsForUrlUseCase, 9 + ) { 8 10 super(); 9 11 } 10 12
+2 -1
src/webapp/api-client/clients/QueryClient.ts
··· 231 231 if (params.limit) searchParams.set('limit', params.limit.toString()); 232 232 if (params.sortBy) searchParams.set('sortBy', params.sortBy); 233 233 if (params.sortOrder) searchParams.set('sortOrder', params.sortOrder); 234 - if (params.threshold) searchParams.set('threshold', params.threshold.toString()); 234 + if (params.threshold) 235 + searchParams.set('threshold', params.threshold.toString()); 235 236 236 237 return this.request<GetSimilarUrlsForUrlResponse>( 237 238 'GET',