search for standard sites
pub-search.waow.tech
search
zig
blog
atproto
1#!/usr/bin/env -S uv run --script --quiet
2# /// script
3# requires-python = ">=3.12"
4# dependencies = ["httpx", "pydantic-settings"]
5# ///
6"""Check vector index and embeddings status."""
7
8import os
9import httpx
10from pydantic_settings import BaseSettings, SettingsConfigDict
11
12
13class Settings(BaseSettings):
14 model_config = SettingsConfigDict(
15 env_file=os.environ.get("ENV_FILE", ".env"), extra="ignore"
16 )
17 turso_url: str
18 turso_token: str
19
20 @property
21 def turso_host(self) -> str:
22 url = self.turso_url
23 if url.startswith("libsql://"):
24 url = url[len("libsql://"):]
25 return url
26
27
28def query(settings, sql):
29 response = httpx.post(
30 f"https://{settings.turso_host}/v2/pipeline",
31 headers={
32 "Authorization": f"Bearer {settings.turso_token}",
33 "Content-Type": "application/json",
34 },
35 json={
36 "requests": [
37 {"type": "execute", "stmt": {"sql": sql}},
38 {"type": "close"},
39 ]
40 },
41 timeout=30,
42 )
43 response.raise_for_status()
44 return response.json()
45
46
47settings = Settings() # type: ignore
48
49# Check embeddings count
50print("Checking embeddings...")
51result = query(settings, "SELECT COUNT(*) as total, SUM(CASE WHEN embedding IS NOT NULL THEN 1 ELSE 0 END) as with_embeddings FROM documents")
52data = result["results"][0]["response"]["result"]["rows"][0]
53total = data[0]["value"] if isinstance(data[0], dict) else data[0]
54with_emb = data[1]["value"] if isinstance(data[1], dict) else data[1]
55print(f" Total documents: {total}")
56print(f" With embeddings: {with_emb}")
57
58# Check if vector index exists
59print("\nChecking for vector index...")
60result = query(settings, "SELECT name FROM sqlite_master WHERE type='table' AND name LIKE '%embedding%'")
61rows = result["results"][0]["response"]["result"]["rows"]
62for row in rows:
63 name = row[0]["value"] if isinstance(row[0], dict) else row[0]
64 print(f" Found table: {name}")
65
66# Try to use the index directly
67print("\nTrying vector search...")
68try:
69 result = query(settings, """
70 SELECT d.uri, d.title
71 FROM vector_top_k('documents_embedding_idx',
72 (SELECT embedding FROM documents LIMIT 1), 3) AS v
73 JOIN documents d ON d.rowid = v.id
74 LIMIT 3
75 """)
76 print(f" Result: {result['results'][0]}")
77except Exception as e:
78 print(f" Error: {e}")