+108
-77
database.py
+108
-77
database.py
···
1
from dataclasses import dataclass
2
import logging
3
from typing import List, Optional
4
import uuid
5
···
70
71
if not profile_coll_exists:
72
logger.info(f"Creating profile collection: {self.profile_collection_name}")
73
-
self._client.create_collection(
74
-
collection_name=self.profile_collection_name,
75
-
vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
76
-
hnsw_config=HnswConfigDiff(m=32, ef_construct=200),
77
-
quantization_config=ScalarQuantization(
78
-
scalar=ScalarQuantizationConfig(
79
-
type=ScalarType.INT8, quantile=0.99, always_ram=True
80
-
)
81
-
),
82
-
)
83
-
self._client.create_payload_index(
84
-
collection_name=self.profile_collection_name,
85
-
field_name="did",
86
-
field_schema=PayloadSchemaType.KEYWORD,
87
-
)
88
-
self._client.create_payload_index(
89
-
collection_name=self.avatar_collection_name,
90
-
field_name="timestamp",
91
-
field_schema=PayloadSchemaType.DATETIME,
92
-
)
93
logger.info("Collection created successfully")
94
95
if not avatar_coll_exists:
96
logger.info(f"Creating avatar collection: {self.avatar_collection_name}")
97
-
self._client.create_collection(
98
-
collection_name=self.avatar_collection_name,
99
-
vectors_config=VectorParams(
100
-
# PDQ vectors have a size of 256
101
-
size=256,
102
-
# Qdrant doesn't support hamming distance, so we'll use euclidian distance and
103
-
# use the square root of the selected max distance for lookups
104
-
distance=Distance.EUCLID,
105
-
),
106
-
hnsw_config=HnswConfigDiff(
107
-
m=16, # lower m for binary-like data
108
-
ef_construct=100,
109
-
),
110
-
quantization_config=BinaryQuantization(
111
-
binary=BinaryQuantizationConfig(always_ram=True)
112
-
),
113
-
)
114
-
self._client.create_payload_index(
115
-
collection_name=self.avatar_collection_name,
116
-
field_name="did",
117
-
field_schema=PayloadSchemaType.KEYWORD,
118
-
)
119
-
self._client.create_payload_index(
120
-
collection_name=self.avatar_collection_name,
121
-
field_name="timestamp",
122
-
field_schema=PayloadSchemaType.DATETIME,
123
-
)
124
125
if not post_coll_exists:
126
logger.info(f"Creating post collection: {self.post_collection_name}")
127
-
self._client.create_collection(
128
-
collection_name=self.post_collection_name,
129
-
vectors_config=VectorParams(
130
-
size=CONFIG.embedding_size,
131
-
distance=Distance.COSINE,
132
-
),
133
-
hnsw_config=HnswConfigDiff(
134
-
m=48,
135
-
ef_construct=256,
136
-
),
137
-
quantization_config=ScalarQuantization(
138
-
scalar=ScalarQuantizationConfig(
139
-
type=ScalarType.INT8,
140
-
quantile=0.99,
141
-
always_ram=True,
142
),
143
-
),
144
-
optimizers_config=OptimizersConfigDiff(
145
-
indexing_threshold=50_000,
146
-
),
147
-
)
148
-
self._client.create_payload_index(
149
-
collection_name=self.post_collection_name,
150
-
field_name="uri",
151
-
field_schema=PayloadSchemaType.KEYWORD,
152
-
)
153
-
self._client.create_payload_index(
154
-
collection_name=self.avatar_collection_name,
155
-
field_name="timestamp",
156
-
field_schema=PayloadSchemaType.DATETIME,
157
-
)
158
logger.info("Collection created successfully")
159
160
def upsert_profile(self, did: str, description: str, vector: List[float]):
···
1
from dataclasses import dataclass
2
import logging
3
+
import sys
4
from typing import List, Optional
5
import uuid
6
···
71
72
if not profile_coll_exists:
73
logger.info(f"Creating profile collection: {self.profile_collection_name}")
74
+
try:
75
+
self._client.create_collection(
76
+
collection_name=self.profile_collection_name,
77
+
vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
78
+
hnsw_config=HnswConfigDiff(m=32, ef_construct=200),
79
+
quantization_config=ScalarQuantization(
80
+
scalar=ScalarQuantizationConfig(
81
+
type=ScalarType.INT8, quantile=0.99, always_ram=True
82
+
)
83
+
),
84
+
)
85
+
except Exception as e:
86
+
logger.error(f"Failed to create profiles collection: {e}")
87
+
sys.exit(1)
88
+
89
+
try:
90
+
self._client.create_payload_index(
91
+
collection_name=self.profile_collection_name,
92
+
field_name="did",
93
+
field_schema=PayloadSchemaType.KEYWORD,
94
+
)
95
+
self._client.create_payload_index(
96
+
collection_name=self.avatar_collection_name,
97
+
field_name="timestamp",
98
+
field_schema=PayloadSchemaType.DATETIME,
99
+
)
100
+
except Exception as e:
101
+
logger.error(f"Failed to create profiles indexes: {e}")
102
+
sys.exit(1)
103
+
104
logger.info("Collection created successfully")
105
106
if not avatar_coll_exists:
107
logger.info(f"Creating avatar collection: {self.avatar_collection_name}")
108
+
109
+
try:
110
+
self._client.create_collection(
111
+
collection_name=self.avatar_collection_name,
112
+
vectors_config=VectorParams(
113
+
# PDQ vectors have a size of 256
114
+
size=256,
115
+
# Qdrant doesn't support hamming distance, so we'll use euclidian distance and
116
+
# use the square root of the selected max distance for lookups
117
+
distance=Distance.EUCLID,
118
+
),
119
+
hnsw_config=HnswConfigDiff(
120
+
m=16, # lower m for binary-like data
121
+
ef_construct=100,
122
+
),
123
+
quantization_config=BinaryQuantization(
124
+
binary=BinaryQuantizationConfig(always_ram=True)
125
+
),
126
+
)
127
+
except Exception as e:
128
+
logger.error(f"Failed to create avatar collection: {e}")
129
+
sys.exit(1)
130
+
131
+
try:
132
+
self._client.create_payload_index(
133
+
collection_name=self.avatar_collection_name,
134
+
field_name="did",
135
+
field_schema=PayloadSchemaType.KEYWORD,
136
+
)
137
+
self._client.create_payload_index(
138
+
collection_name=self.avatar_collection_name,
139
+
field_name="timestamp",
140
+
field_schema=PayloadSchemaType.DATETIME,
141
+
)
142
+
except Exception as e:
143
+
logger.error(f"Failed to create avatar indexes: {e}")
144
+
sys.exit(1)
145
146
if not post_coll_exists:
147
logger.info(f"Creating post collection: {self.post_collection_name}")
148
+
try:
149
+
self._client.create_collection(
150
+
collection_name=self.post_collection_name,
151
+
vectors_config=VectorParams(
152
+
size=CONFIG.embedding_size,
153
+
distance=Distance.COSINE,
154
),
155
+
hnsw_config=HnswConfigDiff(
156
+
m=48,
157
+
ef_construct=256,
158
+
),
159
+
quantization_config=ScalarQuantization(
160
+
scalar=ScalarQuantizationConfig(
161
+
type=ScalarType.INT8,
162
+
quantile=0.99,
163
+
always_ram=True,
164
+
),
165
+
),
166
+
optimizers_config=OptimizersConfigDiff(
167
+
indexing_threshold=50_000,
168
+
),
169
+
)
170
+
except Exception as e:
171
+
logger.error(f"Failed to create posts collection: {e}")
172
+
sys.exit(1)
173
+
174
+
try:
175
+
self._client.create_payload_index(
176
+
collection_name=self.post_collection_name,
177
+
field_name="uri",
178
+
field_schema=PayloadSchemaType.KEYWORD,
179
+
)
180
+
self._client.create_payload_index(
181
+
collection_name=self.avatar_collection_name,
182
+
field_name="timestamp",
183
+
field_schema=PayloadSchemaType.DATETIME,
184
+
)
185
+
except Exception as e:
186
+
logger.error(f"Failed to create post indexes: {e}")
187
+
sys.exit(1)
188
+
189
logger.info("Collection created successfully")
190
191
def upsert_profile(self, did: str, description: str, vector: List[float]):