import logging from typing import List from time import time from sentence_transformers import SentenceTransformer import torch from config import CONFIG from metrics import prom_metrics logger = logging.getLogger(__name__) class EmbeddingService: def __init__(self) -> None: self.model = None def initialized(self): return self.model is not None def initialize(self) -> None: device = CONFIG.embedding_device if device == "cuda" and not torch.cuda.is_available(): device = "cpu" logger.warning("CUDA requested but not availaable, falling back to CPU") if device == "cuda": logger.info("Using CUDA") else: logger.info("Using CPU") self.model = SentenceTransformer(CONFIG.embedding_model, device=device) def encode(self, text: str) -> List[float]: if not text or not text.strip(): return [0.0] * CONFIG.embedding_size status = "error" start_time = time() try: vector = self.model.encode(text, convert_to_numpy=True) status = "ok" return vector.tolist() except Exception as e: logger.error(f"Error getting embedding: {e}") raise e finally: prom_metrics.embedding_performed.labels(status=status).inc() prom_metrics.embedding_duration.labels(status=status).observe( time() - start_time ) EMBEDDING_SERVICE = EmbeddingService()