A lil service that creates embeddings of posts, profiles, and avatars to store them in Qdrant

add searching for different stuff

+86 -1
+19
database.py
··· 252 252 score=1.0, 253 253 ) 254 254 255 + def get_avatar_by_did(self, did: str) -> Optional[ResultWithVector]: 256 + result = self.client.scroll( 257 + collection_name=self.avatar_collection_name, 258 + scroll_filter=Filter( 259 + must=[FieldCondition(key="did", match=MatchValue(value=did))] 260 + ), 261 + with_vectors=True, 262 + with_payload=True, 263 + ) 264 + 265 + if result and result[0] and len(result[0]) > 0: 266 + point = result[0][0] 267 + return ResultWithVector( 268 + did=point.payload["did"], 269 + payload=point.payload, 270 + vector=point.vector, 271 + score=1.0, 272 + ) 273 + 255 274 256 275 QDRANT_SERVICE = QdrantService()
+67 -1
search.py
··· 1 1 import logging 2 + import math 2 3 import sys 3 4 from typing import List 4 5 import click ··· 10 11 from config import CONFIG 11 12 from database import QDRANT_SERVICE, Result 12 13 from embedder import EMBEDDING_SERVICE 14 + from retina import RETINA_CLIENT, binary_to_float_vector, hex_to_binary 13 15 14 16 15 17 logging.basicConfig( ··· 65 67 more = None 66 68 if type == "profile": 67 69 more = result.payload.get("description") 70 + elif type == "avatar": 71 + cid = result.payload.get("cid") 72 + more = f"https://cdn.bsky.app/img/feed_thumbnail/plain/{result.did}/{cid}@jpeg" 73 + elif type == "post": 74 + more = result.payload.get("text") 68 75 69 76 if more is not None: 70 77 row.append(more) ··· 116 123 show_more: bool, 117 124 ): 118 125 # TODO: would be nice if these were flags instead 119 - if type != "profile": 126 + if type not in ["profile", "avatar", "post"]: 120 127 raise Exception("invalid type") 121 128 122 129 QDRANT_SERVICE.initialize() ··· 145 152 146 153 results = QDRANT_SERVICE.search_similar( 147 154 collection_name=CONFIG.qdrant_profile_collection_name, 155 + query_vector=query_vector, 156 + limit=limit, 157 + score_threshold=math.sqrt(threshold), 158 + ) 159 + 160 + display_results(type, description, results, show_more) 161 + elif type == "avatar": 162 + if not query: 163 + console.print("[cyan]Looking up avatar...[/cyan]") 164 + avatar = QDRANT_SERVICE.get_avatar_by_did(did) 165 + 166 + if not avatar: 167 + console.print(f"[red]Avatar not found: {did}[/red]") 168 + sys.exit(1) 169 + 170 + cid = avatar.payload.get("cid") 171 + query_vector = avatar.vector 172 + else: 173 + pts = query.split("/") 174 + 175 + if len(pts) != 8: 176 + console.print("[red]Invalid avatar URL provided[/red]") 177 + sys.exit(1) 178 + 179 + did = pts[6] 180 + cid = pts[7].split("@")[0] 181 + 182 + resp = RETINA_CLIENT.get_image_hash(did, cid) 183 + 184 + if resp.quality_too_low or resp.hash is None: 185 + console.print("[red]Hash quality too low[/red]") 186 + sys.exit(1) 187 + 188 + query_vector = binary_to_float_vector(hex_to_binary(resp.hash)) 189 + 190 + console.print("[cyan]Looking up similar avatars...[/cyan]") 191 + 192 + results = QDRANT_SERVICE.search_similar( 193 + collection_name=CONFIG.qdrant_avatar_collection_name, 194 + query_vector=query_vector, 195 + limit=limit, 196 + score_threshold=threshold, 197 + ) 198 + 199 + display_results(type, cid, results, show_more) 200 + elif type == "post": 201 + if not query: 202 + console.print("[red]Must supply input for post search[/red]") 203 + sys.exit(1) 204 + else: 205 + EMBEDDING_SERVICE.initialize() 206 + 207 + description = query 208 + query_vector = EMBEDDING_SERVICE.encode(query) 209 + 210 + console.print("[cyan]Looking up similar posts...[/cyan]") 211 + 212 + results = QDRANT_SERVICE.search_similar( 213 + collection_name=CONFIG.qdrant_post_collection_name, 148 214 query_vector=query_vector, 149 215 limit=limit, 150 216 score_threshold=threshold,