tangled
alpha
login
or
join now
hailey.at
/
skyembed
1
fork
atom
A lil service that creates embeddings of posts, profiles, and avatars to store them in Qdrant
1
fork
atom
overview
issues
pulls
pipelines
add searching for different stuff
hailey.at
3 months ago
042f51cc
fbcdbc51
+86
-1
2 changed files
expand all
collapse all
unified
split
database.py
search.py
+19
database.py
reviewed
···
252
252
score=1.0,
253
253
)
254
254
255
255
+
def get_avatar_by_did(self, did: str) -> Optional[ResultWithVector]:
256
256
+
result = self.client.scroll(
257
257
+
collection_name=self.avatar_collection_name,
258
258
+
scroll_filter=Filter(
259
259
+
must=[FieldCondition(key="did", match=MatchValue(value=did))]
260
260
+
),
261
261
+
with_vectors=True,
262
262
+
with_payload=True,
263
263
+
)
264
264
+
265
265
+
if result and result[0] and len(result[0]) > 0:
266
266
+
point = result[0][0]
267
267
+
return ResultWithVector(
268
268
+
did=point.payload["did"],
269
269
+
payload=point.payload,
270
270
+
vector=point.vector,
271
271
+
score=1.0,
272
272
+
)
273
273
+
255
274
256
275
QDRANT_SERVICE = QdrantService()
+67
-1
search.py
reviewed
···
1
1
import logging
2
2
+
import math
2
3
import sys
3
4
from typing import List
4
5
import click
···
10
11
from config import CONFIG
11
12
from database import QDRANT_SERVICE, Result
12
13
from embedder import EMBEDDING_SERVICE
14
14
+
from retina import RETINA_CLIENT, binary_to_float_vector, hex_to_binary
13
15
14
16
15
17
logging.basicConfig(
···
65
67
more = None
66
68
if type == "profile":
67
69
more = result.payload.get("description")
70
70
+
elif type == "avatar":
71
71
+
cid = result.payload.get("cid")
72
72
+
more = f"https://cdn.bsky.app/img/feed_thumbnail/plain/{result.did}/{cid}@jpeg"
73
73
+
elif type == "post":
74
74
+
more = result.payload.get("text")
68
75
69
76
if more is not None:
70
77
row.append(more)
···
116
123
show_more: bool,
117
124
):
118
125
# TODO: would be nice if these were flags instead
119
119
-
if type != "profile":
126
126
+
if type not in ["profile", "avatar", "post"]:
120
127
raise Exception("invalid type")
121
128
122
129
QDRANT_SERVICE.initialize()
···
145
152
146
153
results = QDRANT_SERVICE.search_similar(
147
154
collection_name=CONFIG.qdrant_profile_collection_name,
155
155
+
query_vector=query_vector,
156
156
+
limit=limit,
157
157
+
score_threshold=math.sqrt(threshold),
158
158
+
)
159
159
+
160
160
+
display_results(type, description, results, show_more)
161
161
+
elif type == "avatar":
162
162
+
if not query:
163
163
+
console.print("[cyan]Looking up avatar...[/cyan]")
164
164
+
avatar = QDRANT_SERVICE.get_avatar_by_did(did)
165
165
+
166
166
+
if not avatar:
167
167
+
console.print(f"[red]Avatar not found: {did}[/red]")
168
168
+
sys.exit(1)
169
169
+
170
170
+
cid = avatar.payload.get("cid")
171
171
+
query_vector = avatar.vector
172
172
+
else:
173
173
+
pts = query.split("/")
174
174
+
175
175
+
if len(pts) != 8:
176
176
+
console.print("[red]Invalid avatar URL provided[/red]")
177
177
+
sys.exit(1)
178
178
+
179
179
+
did = pts[6]
180
180
+
cid = pts[7].split("@")[0]
181
181
+
182
182
+
resp = RETINA_CLIENT.get_image_hash(did, cid)
183
183
+
184
184
+
if resp.quality_too_low or resp.hash is None:
185
185
+
console.print("[red]Hash quality too low[/red]")
186
186
+
sys.exit(1)
187
187
+
188
188
+
query_vector = binary_to_float_vector(hex_to_binary(resp.hash))
189
189
+
190
190
+
console.print("[cyan]Looking up similar avatars...[/cyan]")
191
191
+
192
192
+
results = QDRANT_SERVICE.search_similar(
193
193
+
collection_name=CONFIG.qdrant_avatar_collection_name,
194
194
+
query_vector=query_vector,
195
195
+
limit=limit,
196
196
+
score_threshold=threshold,
197
197
+
)
198
198
+
199
199
+
display_results(type, cid, results, show_more)
200
200
+
elif type == "post":
201
201
+
if not query:
202
202
+
console.print("[red]Must supply input for post search[/red]")
203
203
+
sys.exit(1)
204
204
+
else:
205
205
+
EMBEDDING_SERVICE.initialize()
206
206
+
207
207
+
description = query
208
208
+
query_vector = EMBEDDING_SERVICE.encode(query)
209
209
+
210
210
+
console.print("[cyan]Looking up similar posts...[/cyan]")
211
211
+
212
212
+
results = QDRANT_SERVICE.search_similar(
213
213
+
collection_name=CONFIG.qdrant_post_collection_name,
148
214
query_vector=query_vector,
149
215
limit=limit,
150
216
score_threshold=threshold,