+28
-12
docs/architecture/url_semantic_search.md
+28
-12
docs/architecture/url_semantic_search.md
···
108
```typescript
109
export interface IVectorDatabase {
110
indexUrl(params: IndexUrlParams): Promise<Result<void>>;
111
-
findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>>;
112
deleteUrl(url: string): Promise<Result<void>>;
113
}
114
···
178
```
179
180
**Configuration:**
181
- `USE_IN_MEMORY_EVENTS=false`
182
- `VECTOR_DB_URL` configured (Pinecone, Weaviate, etc.)
183
- Separate search worker process
···
217
```
218
219
**Configuration:**
220
- `USE_IN_MEMORY_EVENTS=false`
221
- Local vector DB via Docker (Weaviate/Qdrant)
222
- Both web app and search worker in same process
···
250
```
251
252
**Configuration:**
253
- `USE_IN_MEMORY_EVENTS=true`
254
- `USE_MOCK_VECTOR_DB=true`
255
- No external vector DB required
···
326
}
327
328
// 2. Enrich with library counts and user context
329
-
const enrichedUrls = await this.enrichUrlsWithContext(
330
-
similarResult.value,
331
-
);
332
333
return ok(enrichedUrls);
334
}
···
363
// Generate embeddings and upsert to Pinecone
364
}
365
366
-
async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> {
367
// Query Pinecone for similar vectors
368
}
369
}
···
379
// Index in local Weaviate instance
380
}
381
382
-
async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> {
383
// Query local Weaviate instance
384
}
385
}
···
401
return ok(undefined);
402
}
403
404
-
async findSimilarUrls(params: FindSimilarUrlsParams): Promise<Result<UrlSearchResult[]>> {
405
// Simple text similarity using keyword matching
406
const results = Array.from(this.urls.values())
407
-
.filter(indexed => this.calculateSimilarity(params.url, indexed.content) > 0.3)
408
-
.map(indexed => ({
409
url: indexed.url,
410
similarity: this.calculateSimilarity(params.url, indexed.content),
411
metadata: indexed.metadata,
···
420
// Simple keyword-based similarity for mocking
421
const queryWords = query.toLowerCase().split(/\W+/);
422
const contentWords = content.toLowerCase().split(/\W+/);
423
-
const intersection = queryWords.filter(word => contentWords.includes(word));
424
-
return intersection.length / Math.max(queryWords.length, contentWords.length);
425
}
426
}
427
```
···
500
limit: parseInt(req.query.limit) || 10,
501
page: parseInt(req.query.page) || 1,
502
});
503
-
504
res.json(result);
505
});
506
```
···
108
```typescript
109
export interface IVectorDatabase {
110
indexUrl(params: IndexUrlParams): Promise<Result<void>>;
111
+
findSimilarUrls(
112
+
params: FindSimilarUrlsParams,
113
+
): Promise<Result<UrlSearchResult[]>>;
114
deleteUrl(url: string): Promise<Result<void>>;
115
}
116
···
180
```
181
182
**Configuration:**
183
+
184
- `USE_IN_MEMORY_EVENTS=false`
185
- `VECTOR_DB_URL` configured (Pinecone, Weaviate, etc.)
186
- Separate search worker process
···
220
```
221
222
**Configuration:**
223
+
224
- `USE_IN_MEMORY_EVENTS=false`
225
- Local vector DB via Docker (Weaviate/Qdrant)
226
- Both web app and search worker in same process
···
254
```
255
256
**Configuration:**
257
+
258
- `USE_IN_MEMORY_EVENTS=true`
259
- `USE_MOCK_VECTOR_DB=true`
260
- No external vector DB required
···
331
}
332
333
// 2. Enrich with library counts and user context
334
+
const enrichedUrls = await this.enrichUrlsWithContext(similarResult.value);
335
336
return ok(enrichedUrls);
337
}
···
366
// Generate embeddings and upsert to Pinecone
367
}
368
369
+
async findSimilarUrls(
370
+
params: FindSimilarUrlsParams,
371
+
): Promise<Result<UrlSearchResult[]>> {
372
// Query Pinecone for similar vectors
373
}
374
}
···
384
// Index in local Weaviate instance
385
}
386
387
+
async findSimilarUrls(
388
+
params: FindSimilarUrlsParams,
389
+
): Promise<Result<UrlSearchResult[]>> {
390
// Query local Weaviate instance
391
}
392
}
···
408
return ok(undefined);
409
}
410
411
+
async findSimilarUrls(
412
+
params: FindSimilarUrlsParams,
413
+
): Promise<Result<UrlSearchResult[]>> {
414
// Simple text similarity using keyword matching
415
const results = Array.from(this.urls.values())
416
+
.filter(
417
+
(indexed) =>
418
+
this.calculateSimilarity(params.url, indexed.content) > 0.3,
419
+
)
420
+
.map((indexed) => ({
421
url: indexed.url,
422
similarity: this.calculateSimilarity(params.url, indexed.content),
423
metadata: indexed.metadata,
···
432
// Simple keyword-based similarity for mocking
433
const queryWords = query.toLowerCase().split(/\W+/);
434
const contentWords = content.toLowerCase().split(/\W+/);
435
+
const intersection = queryWords.filter((word) =>
436
+
contentWords.includes(word),
437
+
);
438
+
return (
439
+
intersection.length / Math.max(queryWords.length, contentWords.length)
440
+
);
441
}
442
}
443
```
···
516
limit: parseInt(req.query.limit) || 10,
517
page: parseInt(req.query.page) || 1,
518
});
519
+
520
res.json(result);
521
});
522
```
+2
-1
src/modules/atproto/tests/infrastructure/InMemoryAppPasswordSessionRepository.ts
+2
-1
src/modules/atproto/tests/infrastructure/InMemoryAppPasswordSessionRepository.ts
···
14
15
public static getInstance(): InMemoryAppPasswordSessionRepository {
16
if (!InMemoryAppPasswordSessionRepository.instance) {
17
+
InMemoryAppPasswordSessionRepository.instance =
18
+
new InMemoryAppPasswordSessionRepository();
19
}
20
return InMemoryAppPasswordSessionRepository.instance;
21
}
+2
-1
src/modules/cards/tests/utils/InMemoryCollectionRepository.ts
+2
-1
src/modules/cards/tests/utils/InMemoryCollectionRepository.ts
+8
-2
src/modules/search/application/eventHandlers/CardAddedToLibraryEventHandler.ts
+8
-2
src/modules/search/application/eventHandlers/CardAddedToLibraryEventHandler.ts
···
16
// Get card details to check if it's a URL card
17
const cardResult = await this.cardRepository.findById(event.cardId);
18
if (cardResult.isErr()) {
19
-
console.error('Failed to find card for search indexing:', cardResult.error);
20
return ok(undefined); // Don't fail the event processing
21
}
22
23
const card = cardResult.value;
24
if (!card) {
25
-
console.warn('Card not found for search indexing:', event.cardId.getStringValue());
26
return ok(undefined);
27
}
28
···
16
// Get card details to check if it's a URL card
17
const cardResult = await this.cardRepository.findById(event.cardId);
18
if (cardResult.isErr()) {
19
+
console.error(
20
+
'Failed to find card for search indexing:',
21
+
cardResult.error,
22
+
);
23
return ok(undefined); // Don't fail the event processing
24
}
25
26
const card = cardResult.value;
27
if (!card) {
28
+
console.warn(
29
+
'Card not found for search indexing:',
30
+
event.cardId.getStringValue(),
31
+
);
32
return ok(undefined);
33
}
34
+4
-1
src/modules/search/application/useCases/commands/IndexUrlForSearchUseCase.ts
+4
-1
src/modules/search/application/useCases/commands/IndexUrlForSearchUseCase.ts
+4
-1
src/modules/search/application/useCases/queries/GetSimilarUrlsForUrlUseCase.ts
+4
-1
src/modules/search/application/useCases/queries/GetSimilarUrlsForUrlUseCase.ts
···
5
import { URL } from '../../../../cards/domain/value-objects/URL';
6
import { SearchService } from '../../../domain/services/SearchService';
7
import { GetSimilarUrlsForUrlParams } from '@semble/types/api/requests';
8
-
import { GetSimilarUrlsForUrlResponse, UrlView } from '@semble/types/api/responses';
9
import { Pagination } from '@semble/types/api/common';
10
11
export interface GetSimilarUrlsForUrlQuery extends GetSimilarUrlsForUrlParams {
···
5
import { URL } from '../../../../cards/domain/value-objects/URL';
6
import { SearchService } from '../../../domain/services/SearchService';
7
import { GetSimilarUrlsForUrlParams } from '@semble/types/api/requests';
8
+
import {
9
+
GetSimilarUrlsForUrlResponse,
10
+
UrlView,
11
+
} from '@semble/types/api/responses';
12
import { Pagination } from '@semble/types/api/common';
13
14
export interface GetSimilarUrlsForUrlQuery extends GetSimilarUrlsForUrlParams {
+3
-1
src/modules/search/domain/IVectorDatabase.ts
+3
-1
src/modules/search/domain/IVectorDatabase.ts
+4
-6
src/modules/search/domain/services/SearchService.ts
+4
-6
src/modules/search/domain/services/SearchService.ts
···
124
const enrichedResults = await Promise.all(
125
searchResults.map(async (result) => {
126
// Get library information for this URL
127
-
const librariesResult = await this.cardQueryRepository.getLibrariesForUrl(
128
-
result.url,
129
-
{
130
page: 1,
131
limit: 1000, // Get all libraries to count them
132
sortBy: 'createdAt' as any, // Type assertion needed due to enum mismatch
133
sortOrder: 'desc' as any,
134
-
},
135
-
);
136
137
const urlLibraryCount = librariesResult.totalCount;
138
-
139
// Check if calling user has this URL in their library
140
// Default to false if no calling user (unauthenticated request)
141
const urlInLibrary = callingUserId
···
124
const enrichedResults = await Promise.all(
125
searchResults.map(async (result) => {
126
// Get library information for this URL
127
+
const librariesResult =
128
+
await this.cardQueryRepository.getLibrariesForUrl(result.url, {
129
page: 1,
130
limit: 1000, // Get all libraries to count them
131
sortBy: 'createdAt' as any, // Type assertion needed due to enum mismatch
132
sortOrder: 'desc' as any,
133
+
});
134
135
const urlLibraryCount = librariesResult.totalCount;
136
+
137
// Check if calling user has this URL in their library
138
// Default to false if no calling user (unauthenticated request)
139
const urlInLibrary = callingUserId
+8
-4
src/modules/search/infrastructure/InMemoryVectorDatabase.ts
+8
-4
src/modules/search/infrastructure/InMemoryVectorDatabase.ts
···
60
): Promise<Result<UrlSearchResult[]>> {
61
try {
62
console.log('all urls to compare', this.urls);
63
-
const threshold = params.threshold || 0.1; // Lower default threshold for more matches
64
const results: UrlSearchResult[] = [];
65
66
// Get the query URL's content for comparison
···
78
indexed.content,
79
);
80
81
-
console.log(`Similarity between "${queryContent}" and "${indexed.content}": ${similarity}`);
82
83
if (similarity >= threshold) {
84
results.push({
···
93
results.sort((a, b) => b.similarity - a.similarity);
94
const limitedResults = results.slice(0, params.limit);
95
96
-
console.log(`Found ${limitedResults.length} similar URLs above threshold ${threshold}`);
97
98
return ok(limitedResults);
99
} catch (error) {
···
144
for (const word of new Set([...words1, ...words2])) {
145
const count1 = freq1.get(word) || 0;
146
const count2 = freq2.get(word) || 0;
147
-
148
if (count1 > 0 && count2 > 0) {
149
sharedWords += Math.min(count1, count2);
150
}
···
60
): Promise<Result<UrlSearchResult[]>> {
61
try {
62
console.log('all urls to compare', this.urls);
63
+
const threshold = params.threshold || 0; // Lower default threshold for more matches
64
const results: UrlSearchResult[] = [];
65
66
// Get the query URL's content for comparison
···
78
indexed.content,
79
);
80
81
+
console.log(
82
+
`Similarity between "${queryContent}" and "${indexed.content}": ${similarity}`,
83
+
);
84
85
if (similarity >= threshold) {
86
results.push({
···
95
results.sort((a, b) => b.similarity - a.similarity);
96
const limitedResults = results.slice(0, params.limit);
97
98
+
console.log(
99
+
`Found ${limitedResults.length} similar URLs above threshold ${threshold}`,
100
+
);
101
102
return ok(limitedResults);
103
} catch (error) {
···
148
for (const word of new Set([...words1, ...words2])) {
149
const count1 = freq1.get(word) || 0;
150
const count2 = freq2.get(word) || 0;
151
+
152
if (count1 > 0 && count2 > 0) {
153
sharedWords += Math.min(count1, count2);
154
}
+3
-1
src/modules/search/infrastructure/http/controllers/GetSimilarUrlsForUrlController.ts
+3
-1
src/modules/search/infrastructure/http/controllers/GetSimilarUrlsForUrlController.ts
+2
-1
src/webapp/api-client/clients/QueryClient.ts
+2
-1
src/webapp/api-client/clients/QueryClient.ts
···
231
if (params.limit) searchParams.set('limit', params.limit.toString());
232
if (params.sortBy) searchParams.set('sortBy', params.sortBy);
233
if (params.sortOrder) searchParams.set('sortOrder', params.sortOrder);
234
-
if (params.threshold) searchParams.set('threshold', params.threshold.toString());
235
236
return this.request<GetSimilarUrlsForUrlResponse>(
237
'GET',
···
231
if (params.limit) searchParams.set('limit', params.limit.toString());
232
if (params.sortBy) searchParams.set('sortBy', params.sortBy);
233
if (params.sortOrder) searchParams.set('sortOrder', params.sortOrder);
234
+
if (params.threshold)
235
+
searchParams.set('threshold', params.threshold.toString());
236
237
return this.request<GetSimilarUrlsForUrlResponse>(
238
'GET',