search for standard sites pub-search.waow.tech
search zig blog atproto
at main 78 lines 2.4 kB view raw
1#!/usr/bin/env -S uv run --script --quiet 2# /// script 3# requires-python = ">=3.12" 4# dependencies = ["httpx", "pydantic-settings"] 5# /// 6"""Check vector index and embeddings status.""" 7 8import os 9import httpx 10from pydantic_settings import BaseSettings, SettingsConfigDict 11 12 13class Settings(BaseSettings): 14 model_config = SettingsConfigDict( 15 env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore" 16 ) 17 turso_url: str 18 turso_token: str 19 20 @property 21 def turso_host(self) -> str: 22 url = self.turso_url 23 if url.startswith("libsql://"): 24 url = url[len("libsql://"):] 25 return url 26 27 28def query(settings, sql): 29 response = httpx.post( 30 f"https://{settings.turso_host}/v2/pipeline", 31 headers={ 32 "Authorization": f"Bearer {settings.turso_token}", 33 "Content-Type": "application/json", 34 }, 35 json={ 36 "requests": [ 37 {"type": "execute", "stmt": {"sql": sql}}, 38 {"type": "close"}, 39 ] 40 }, 41 timeout=30, 42 ) 43 response.raise_for_status() 44 return response.json() 45 46 47settings = Settings() # type: ignore 48 49# Check embeddings count 50print("Checking embeddings...") 51result = query(settings, "SELECT COUNT(*) as total, SUM(CASE WHEN embedding IS NOT NULL THEN 1 ELSE 0 END) as with_embeddings FROM documents") 52data = result["results"][0]["response"]["result"]["rows"][0] 53total = data[0]["value"] if isinstance(data[0], dict) else data[0] 54with_emb = data[1]["value"] if isinstance(data[1], dict) else data[1] 55print(f" Total documents: {total}") 56print(f" With embeddings: {with_emb}") 57 58# Check if vector index exists 59print("\nChecking for vector index...") 60result = query(settings, "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE '%embedding%'") 61rows = result["results"][0]["response"]["result"]["rows"] 62for row in rows: 63 name = row[0]["value"] if isinstance(row[0], dict) else row[0] 64 print(f" Found table: {name}") 65 66# Try to use the index directly 67print("\nTrying vector search...") 68try: 69 result = query(settings, """ 70 SELECT d.uri, d.title 71 FROM vector_top_k('documents_embedding_idx', 72 (SELECT embedding FROM documents LIMIT 1), 3) AS v 73 JOIN documents d ON d.rowid = v.id 74 LIMIT 3 75 """) 76 print(f" Result: {result['results'][0]}") 77except Exception as e: 78 print(f" Error: {e}")