+28
-12
docs/architecture/url_semantic_search.md
+28
-12
docs/architecture/url_semantic_search.md
···
108
108
```typescript
109
109
export interface IVectorDatabase {
110
110
indexUrl(params: IndexUrlParams): Promise<Result<void>>;
111
-
findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>>;
111
+
findSimilarUrls(
112
+
params: FindSimilarUrlsParams,
113
+
): Promise<Result<UrlSearchResult[]>>;
112
114
deleteUrl(url: string): Promise<Result<void>>;
113
115
}
114
116
···
178
180
```
179
181
180
182
**Configuration:**
183
+
181
184
- `USE_IN_MEMORY_EVENTS=false`
182
185
- `VECTOR_DB_URL` configured (Pinecone, Weaviate, etc.)
183
186
- Separate search worker process
···
217
220
```
218
221
219
222
**Configuration:**
223
+
220
224
- `USE_IN_MEMORY_EVENTS=false`
221
225
- Local vector DB via Docker (Weaviate/Qdrant)
222
226
- Both web app and search worker in same process
···
250
254
```
251
255
252
256
**Configuration:**
257
+
253
258
- `USE_IN_MEMORY_EVENTS=true`
254
259
- `USE_MOCK_VECTOR_DB=true`
255
260
- No external vector DB required
···
326
331
}
327
332
328
333
// 2. Enrich with library counts and user context
329
-
const enrichedUrls = await this.enrichUrlsWithContext(
330
-
similarResult.value,
331
-
);
334
+
const enrichedUrls = await this.enrichUrlsWithContext(similarResult.value);
332
335
333
336
return ok(enrichedUrls);
334
337
}
···
363
366
// Generate embeddings and upsert to Pinecone
364
367
}
365
368
366
-
async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> {
369
+
async findSimilarUrls(
370
+
params: FindSimilarUrlsParams,
371
+
): Promise<Result<UrlSearchResult[]>> {
367
372
// Query Pinecone for similar vectors
368
373
}
369
374
}
···
379
384
// Index in local Weaviate instance
380
385
}
381
386
382
-
async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> {
387
+
async findSimilarUrls(
388
+
params: FindSimilarUrlsParams,
389
+
): Promise<Result<UrlSearchResult[]>> {
383
390
// Query local Weaviate instance
384
391
}
385
392
}
···
401
408
return ok(undefined);
402
409
}
403
410
404
-
async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> {
411
+
async findSimilarUrls(
412
+
params: FindSimilarUrlsParams,
413
+
): Promise<Result<UrlSearchResult[]>> {
405
414
// Simple text similarity using keyword matching
406
415
const results = Array.from(this.urls.values())
407
-
.filter(indexed => this.calculateSimilarity(params.url, indexed.content) > 0.3)
408
-
.map(indexed => ({
416
+
.filter(
417
+
(indexed) =>
418
+
this.calculateSimilarity(params.url, indexed.content) > 0.3,
419
+
)
420
+
.map((indexed) => ({
409
421
url: indexed.url,
410
422
similarity: this.calculateSimilarity(params.url, indexed.content),
411
423
metadata: indexed.metadata,
···
420
432
// Simple keyword-based similarity for mocking
421
433
const queryWords = query.toLowerCase().split(/\W+/);
422
434
const contentWords = content.toLowerCase().split(/\W+/);
423
-
const intersection = queryWords.filter(word => contentWords.includes(word));
424
-
return intersection.length / Math.max(queryWords.length, contentWords.length);
435
+
const intersection = queryWords.filter((word) =>
436
+
contentWords.includes(word),
437
+
);
438
+
return (
439
+
intersection.length / Math.max(queryWords.length, contentWords.length)
440
+
);
425
441
}
426
442
}
427
443
```
···
500
516
limit: parseInt(req.query.limit) || 10,
501
517
page: parseInt(req.query.page) || 1,
502
518
});
503
-
519
+
504
520
res.json(result);
505
521
});
506
522
```
+2
-1
src/modules/atproto/tests/infrastructure/InMemoryAppPasswordSessionRepository.ts
+2
-1
src/modules/atproto/tests/infrastructure/InMemoryAppPasswordSessionRepository.ts
···
14
14
15
15
public static getInstance(): InMemoryAppPasswordSessionRepository {
16
16
if (!InMemoryAppPasswordSessionRepository.instance) {
17
-
InMemoryAppPasswordSessionRepository.instance = new InMemoryAppPasswordSessionRepository();
17
+
InMemoryAppPasswordSessionRepository.instance =
18
+
new InMemoryAppPasswordSessionRepository();
18
19
}
19
20
return InMemoryAppPasswordSessionRepository.instance;
20
21
}
+2
-1
src/modules/cards/tests/utils/InMemoryCollectionRepository.ts
+2
-1
src/modules/cards/tests/utils/InMemoryCollectionRepository.ts
···
13
13
14
14
public static getInstance(): InMemoryCollectionRepository {
15
15
if (!InMemoryCollectionRepository.instance) {
16
-
InMemoryCollectionRepository.instance = new InMemoryCollectionRepository();
16
+
InMemoryCollectionRepository.instance =
17
+
new InMemoryCollectionRepository();
17
18
}
18
19
return InMemoryCollectionRepository.instance;
19
20
}
+8
-2
src/modules/search/application/eventHandlers/CardAddedToLibraryEventHandler.ts
+8
-2
src/modules/search/application/eventHandlers/CardAddedToLibraryEventHandler.ts
···
16
16
// Get card details to check if it's a URL card
17
17
const cardResult = await this.cardRepository.findById(event.cardId);
18
18
if (cardResult.isErr()) {
19
-
console.error('Failed to find card for search indexing:', cardResult.error);
19
+
console.error(
20
+
'Failed to find card for search indexing:',
21
+
cardResult.error,
22
+
);
20
23
return ok(undefined); // Don't fail the event processing
21
24
}
22
25
23
26
const card = cardResult.value;
24
27
if (!card) {
25
-
console.warn('Card not found for search indexing:', event.cardId.getStringValue());
28
+
console.warn(
29
+
'Card not found for search indexing:',
30
+
event.cardId.getStringValue(),
31
+
);
26
32
return ok(undefined);
27
33
}
28
34
+4
-1
src/modules/search/application/useCases/commands/IndexUrlForSearchUseCase.ts
+4
-1
src/modules/search/application/useCases/commands/IndexUrlForSearchUseCase.ts
···
24
24
implements
25
25
UseCase<
26
26
IndexUrlForSearchDTO,
27
-
Result<IndexUrlForSearchResponseDTO, ValidationError | AppError.UnexpectedError>
27
+
Result<
28
+
IndexUrlForSearchResponseDTO,
29
+
ValidationError | AppError.UnexpectedError
30
+
>
28
31
>
29
32
{
30
33
constructor(private searchService: SearchService) {}
+4
-1
src/modules/search/application/useCases/queries/GetSimilarUrlsForUrlUseCase.ts
+4
-1
src/modules/search/application/useCases/queries/GetSimilarUrlsForUrlUseCase.ts
···
5
5
import { URL } from '../../../../cards/domain/value-objects/URL';
6
6
import { SearchService } from '../../../domain/services/SearchService';
7
7
import { GetSimilarUrlsForUrlParams } from '@semble/types/api/requests';
8
-
import { GetSimilarUrlsForUrlResponse, UrlView } from '@semble/types/api/responses';
8
+
import {
9
+
GetSimilarUrlsForUrlResponse,
10
+
UrlView,
11
+
} from '@semble/types/api/responses';
9
12
import { Pagination } from '@semble/types/api/common';
10
13
11
14
export interface GetSimilarUrlsForUrlQuery extends GetSimilarUrlsForUrlParams {
+3
-1
src/modules/search/domain/IVectorDatabase.ts
+3
-1
src/modules/search/domain/IVectorDatabase.ts
···
33
33
/**
34
34
* Find URLs similar to the given URL
35
35
*/
36
-
findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>>;
36
+
findSimilarUrls(
37
+
params: FindSimilarUrlsParams,
38
+
): Promise<Result<UrlSearchResult[]>>;
37
39
38
40
/**
39
41
* Remove a URL from the search index
+4
-6
src/modules/search/domain/services/SearchService.ts
+4
-6
src/modules/search/domain/services/SearchService.ts
···
124
124
const enrichedResults = await Promise.all(
125
125
searchResults.map(async (result) => {
126
126
// Get library information for this URL
127
-
const librariesResult = await this.cardQueryRepository.getLibrariesForUrl(
128
-
result.url,
129
-
{
127
+
const librariesResult =
128
+
await this.cardQueryRepository.getLibrariesForUrl(result.url, {
130
129
page: 1,
131
130
limit: 1000, // Get all libraries to count them
132
131
sortBy: 'createdAt' as any, // Type assertion needed due to enum mismatch
133
132
sortOrder: 'desc' as any,
134
-
},
135
-
);
133
+
});
136
134
137
135
const urlLibraryCount = librariesResult.totalCount;
138
-
136
+
139
137
// Check if calling user has this URL in their library
140
138
// Default to false if no calling user (unauthenticated request)
141
139
const urlInLibrary = callingUserId
+8
-4
src/modules/search/infrastructure/InMemoryVectorDatabase.ts
+8
-4
src/modules/search/infrastructure/InMemoryVectorDatabase.ts
···
60
60
): Promise<Result<UrlSearchResult[]>> {
61
61
try {
62
62
console.log('all urls to compare', this.urls);
63
-
const threshold = params.threshold || 0.1; // Lower default threshold for more matches
63
+
const threshold = params.threshold || 0; // Lower default threshold for more matches
64
64
const results: UrlSearchResult[] = [];
65
65
66
66
// Get the query URL's content for comparison
···
78
78
indexed.content,
79
79
);
80
80
81
-
console.log(`Similarity between "${queryContent}" and "${indexed.content}": ${similarity}`);
81
+
console.log(
82
+
`Similarity between "${queryContent}" and "${indexed.content}": ${similarity}`,
83
+
);
82
84
83
85
if (similarity >= threshold) {
84
86
results.push({
···
93
95
results.sort((a, b) => b.similarity - a.similarity);
94
96
const limitedResults = results.slice(0, params.limit);
95
97
96
-
console.log(`Found ${limitedResults.length} similar URLs above threshold ${threshold}`);
98
+
console.log(
99
+
`Found ${limitedResults.length} similar URLs above threshold ${threshold}`,
100
+
);
97
101
98
102
return ok(limitedResults);
99
103
} catch (error) {
···
144
148
for (const word of new Set([...words1, ...words2])) {
145
149
const count1 = freq1.get(word) || 0;
146
150
const count2 = freq2.get(word) || 0;
147
-
151
+
148
152
if (count1 > 0 && count2 > 0) {
149
153
sharedWords += Math.min(count1, count2);
150
154
}
+3
-1
src/modules/search/infrastructure/http/controllers/GetSimilarUrlsForUrlController.ts
+3
-1
src/modules/search/infrastructure/http/controllers/GetSimilarUrlsForUrlController.ts
···
4
4
import { AuthenticatedRequest } from '../../../../../shared/infrastructure/http/middleware/AuthMiddleware';
5
5
6
6
export class GetSimilarUrlsForUrlController extends Controller {
7
-
constructor(private getSimilarUrlsForUrlUseCase: GetSimilarUrlsForUrlUseCase) {
7
+
constructor(
8
+
private getSimilarUrlsForUrlUseCase: GetSimilarUrlsForUrlUseCase,
9
+
) {
8
10
super();
9
11
}
10
12
+2
-1
src/webapp/api-client/clients/QueryClient.ts
+2
-1
src/webapp/api-client/clients/QueryClient.ts
···
231
231
if (params.limit) searchParams.set('limit', params.limit.toString());
232
232
if (params.sortBy) searchParams.set('sortBy', params.sortBy);
233
233
if (params.sortOrder) searchParams.set('sortOrder', params.sortOrder);
234
-
if (params.threshold) searchParams.set('threshold', params.threshold.toString());
234
+
if (params.threshold)
235
+
searchParams.set('threshold', params.threshold.toString());
235
236
236
237
return this.request<GetSimilarUrlsForUrlResponse>(
237
238
'GET',