A lil service that creates embeddings of posts, profiles, and avatars to store them in Qdrant

try/except

Changed files
+108 -77
+108 -77
database.py
··· 1 1 from dataclasses import dataclass 2 2 import logging 3 + import sys 3 4 from typing import List, Optional 4 5 import uuid 5 6 ··· 70 71 71 72 if not profile_coll_exists: 72 73 logger.info(f"Creating profile collection: {self.profile_collection_name}") 73 - self._client.create_collection( 74 - collection_name=self.profile_collection_name, 75 - vectors_config=VectorParams(size=1024, distance=Distance.COSINE), 76 - hnsw_config=HnswConfigDiff(m=32, ef_construct=200), 77 - quantization_config=ScalarQuantization( 78 - scalar=ScalarQuantizationConfig( 79 - type=ScalarType.INT8, quantile=0.99, always_ram=True 80 - ) 81 - ), 82 - ) 83 - self._client.create_payload_index( 84 - collection_name=self.profile_collection_name, 85 - field_name="did", 86 - field_schema=PayloadSchemaType.KEYWORD, 87 - ) 88 - self._client.create_payload_index( 89 - collection_name=self.avatar_collection_name, 90 - field_name="timestamp", 91 - field_schema=PayloadSchemaType.DATETIME, 92 - ) 74 + try: 75 + self._client.create_collection( 76 + collection_name=self.profile_collection_name, 77 + vectors_config=VectorParams(size=1024, distance=Distance.COSINE), 78 + hnsw_config=HnswConfigDiff(m=32, ef_construct=200), 79 + quantization_config=ScalarQuantization( 80 + scalar=ScalarQuantizationConfig( 81 + type=ScalarType.INT8, quantile=0.99, always_ram=True 82 + ) 83 + ), 84 + ) 85 + except Exception as e: 86 + logger.error(f"Failed to create profiles collection: {e}") 87 + sys.exit(1) 88 + 89 + try: 90 + self._client.create_payload_index( 91 + collection_name=self.profile_collection_name, 92 + field_name="did", 93 + field_schema=PayloadSchemaType.KEYWORD, 94 + ) 95 + self._client.create_payload_index( 96 + collection_name=self.avatar_collection_name, 97 + field_name="timestamp", 98 + field_schema=PayloadSchemaType.DATETIME, 99 + ) 100 + except Exception as e: 101 + logger.error(f"Failed to create profiles indexes: {e}") 102 + sys.exit(1) 103 + 93 104 logger.info("Collection created successfully") 94 105 95 106 if not avatar_coll_exists: 96 107 logger.info(f"Creating avatar collection: {self.avatar_collection_name}") 97 - self._client.create_collection( 98 - collection_name=self.avatar_collection_name, 99 - vectors_config=VectorParams( 100 - # PDQ vectors have a size of 256 101 - size=256, 102 - # Qdrant doesn't support hamming distance, so we'll use euclidian distance and 103 - # use the square root of the selected max distance for lookups 104 - distance=Distance.EUCLID, 105 - ), 106 - hnsw_config=HnswConfigDiff( 107 - m=16, # lower m for binary-like data 108 - ef_construct=100, 109 - ), 110 - quantization_config=BinaryQuantization( 111 - binary=BinaryQuantizationConfig(always_ram=True) 112 - ), 113 - ) 114 - self._client.create_payload_index( 115 - collection_name=self.avatar_collection_name, 116 - field_name="did", 117 - field_schema=PayloadSchemaType.KEYWORD, 118 - ) 119 - self._client.create_payload_index( 120 - collection_name=self.avatar_collection_name, 121 - field_name="timestamp", 122 - field_schema=PayloadSchemaType.DATETIME, 123 - ) 108 + 109 + try: 110 + self._client.create_collection( 111 + collection_name=self.avatar_collection_name, 112 + vectors_config=VectorParams( 113 + # PDQ vectors have a size of 256 114 + size=256, 115 + # Qdrant doesn't support hamming distance, so we'll use euclidian distance and 116 + # use the square root of the selected max distance for lookups 117 + distance=Distance.EUCLID, 118 + ), 119 + hnsw_config=HnswConfigDiff( 120 + m=16, # lower m for binary-like data 121 + ef_construct=100, 122 + ), 123 + quantization_config=BinaryQuantization( 124 + binary=BinaryQuantizationConfig(always_ram=True) 125 + ), 126 + ) 127 + except Exception as e: 128 + logger.error(f"Failed to create avatar collection: {e}") 129 + sys.exit(1) 130 + 131 + try: 132 + self._client.create_payload_index( 133 + collection_name=self.avatar_collection_name, 134 + field_name="did", 135 + field_schema=PayloadSchemaType.KEYWORD, 136 + ) 137 + self._client.create_payload_index( 138 + collection_name=self.avatar_collection_name, 139 + field_name="timestamp", 140 + field_schema=PayloadSchemaType.DATETIME, 141 + ) 142 + except Exception as e: 143 + logger.error(f"Failed to create avatar indexes: {e}") 144 + sys.exit(1) 124 145 125 146 if not post_coll_exists: 126 147 logger.info(f"Creating post collection: {self.post_collection_name}") 127 - self._client.create_collection( 128 - collection_name=self.post_collection_name, 129 - vectors_config=VectorParams( 130 - size=CONFIG.embedding_size, 131 - distance=Distance.COSINE, 132 - ), 133 - hnsw_config=HnswConfigDiff( 134 - m=48, 135 - ef_construct=256, 136 - ), 137 - quantization_config=ScalarQuantization( 138 - scalar=ScalarQuantizationConfig( 139 - type=ScalarType.INT8, 140 - quantile=0.99, 141 - always_ram=True, 148 + try: 149 + self._client.create_collection( 150 + collection_name=self.post_collection_name, 151 + vectors_config=VectorParams( 152 + size=CONFIG.embedding_size, 153 + distance=Distance.COSINE, 142 154 ), 143 - ), 144 - optimizers_config=OptimizersConfigDiff( 145 - indexing_threshold=50_000, 146 - ), 147 - ) 148 - self._client.create_payload_index( 149 - collection_name=self.post_collection_name, 150 - field_name="uri", 151 - field_schema=PayloadSchemaType.KEYWORD, 152 - ) 153 - self._client.create_payload_index( 154 - collection_name=self.avatar_collection_name, 155 - field_name="timestamp", 156 - field_schema=PayloadSchemaType.DATETIME, 157 - ) 155 + hnsw_config=HnswConfigDiff( 156 + m=48, 157 + ef_construct=256, 158 + ), 159 + quantization_config=ScalarQuantization( 160 + scalar=ScalarQuantizationConfig( 161 + type=ScalarType.INT8, 162 + quantile=0.99, 163 + always_ram=True, 164 + ), 165 + ), 166 + optimizers_config=OptimizersConfigDiff( 167 + indexing_threshold=50_000, 168 + ), 169 + ) 170 + except Exception as e: 171 + logger.error(f"Failed to create posts collection: {e}") 172 + sys.exit(1) 173 + 174 + try: 175 + self._client.create_payload_index( 176 + collection_name=self.post_collection_name, 177 + field_name="uri", 178 + field_schema=PayloadSchemaType.KEYWORD, 179 + ) 180 + self._client.create_payload_index( 181 + collection_name=self.avatar_collection_name, 182 + field_name="timestamp", 183 + field_schema=PayloadSchemaType.DATETIME, 184 + ) 185 + except Exception as e: 186 + logger.error(f"Failed to create post indexes: {e}") 187 + sys.exit(1) 188 + 158 189 logger.info("Collection created successfully") 159 190 160 191 def upsert_profile(self, did: str, description: str, vector: List[float]):