···11-- Raw records from firehose/jetstream
22-- Core table for all AT Protocol records before denormalization
33--
44--- Uses ReplacingMergeTree to deduplicate on (collection, did, rkey) keeping latest indexed_at
44+-- Append-only log using plain MergeTree - all versions preserved for audit/rollback.
55+-- Query-time deduplication via ORDER BY + LIMIT or window functions.
56-- JSON column stores full record, extract fields only when needed for ORDER BY/WHERE/JOINs
6778CREATE TABLE IF NOT EXISTS raw_records (
···1011 collection LowCardinality(String),
1112 rkey String,
12131313- -- Content identifier from the record
1414+ -- Content identifier from the record (content-addressed hash)
1415 cid String,
15161616- -- Repository revision (TID) - monotonically increasing per DID, used for dedup/ordering
1717+ -- Repository revision (TID) - monotonically increasing per DID, used for ordering
1718 rev String,
18191920 -- Full record as native JSON (schema-flexible, queryable with record.field.subfield)
2021 record JSON,
21222222- -- Operation: 'create', 'update', 'delete'
2323+ -- Operation: 'create', 'update', 'delete', 'cache' (fetched on-demand)
2324 operation LowCardinality(String),
24252526 -- Firehose sequence number (metadata only, not for ordering - can jump on relay restart)
···4950 SELECT * ORDER BY (did, cid)
5051 )
5152)
5252-ENGINE = ReplacingMergeTree(indexed_at)
5353-ORDER BY (collection, did, rkey, event_time, indexed_at)
5454-SETTINGS deduplicate_merge_projection_mode = 'drop';
5353+ENGINE = MergeTree()
5454+ORDER BY (collection, did, rkey, event_time, indexed_at);
···11+-- Auto-populate handle_mappings from identity events when handle is present
22+33+CREATE MATERIALIZED VIEW IF NOT EXISTS handle_mappings_from_identity_mv TO handle_mappings AS
44+SELECT
55+ handle,
66+ did,
77+ 0 as freed,
88+ 'active' as account_status,
99+ 'identity' as source,
1010+ event_time,
1111+ now64(3) as indexed_at
1212+FROM raw_identity_events
1313+WHERE handle != ''
···11+-- Auto-populate freed status from account events
22+-- JOINs against handle_mappings to find current handle for the DID
33+-- If no mapping exists yet, the JOIN fails silently (can't free unknown handles)
44+55+CREATE MATERIALIZED VIEW IF NOT EXISTS handle_mappings_from_account_mv TO handle_mappings AS
66+SELECT
77+ h.handle,
88+ a.did,
99+ 1 as freed,
1010+ a.status as account_status,
1111+ 'account' as source,
1212+ a.event_time,
1313+ now64(3) as indexed_at
1414+FROM raw_account_events a
1515+INNER JOIN handle_mappings h ON h.did = a.did AND h.freed = 0
1616+WHERE a.active = 0 AND a.status != ''
···11+-- Profile counts aggregated from graph tables
22+-- Updated by MVs from follows, notebooks, entries (added later with those tables)
33+-- Joined with profiles at query time
44+55+CREATE TABLE IF NOT EXISTS profile_counts (
66+ did String,
77+88+ -- Signed for increment/decrement from MVs
99+ follower_count Int64 DEFAULT 0,
1010+ following_count Int64 DEFAULT 0,
1111+ notebook_count Int64 DEFAULT 0,
1212+ entry_count Int64 DEFAULT 0,
1313+1414+ indexed_at DateTime64(3) DEFAULT now64(3)
1515+)
1616+ENGINE = SummingMergeTree((follower_count, following_count, notebook_count, entry_count))
1717+ORDER BY did
···11+-- Notebook engagement counts
22+-- Updated by MVs from likes, bookmarks, subscriptions (added later with graph tables)
33+-- Joined with notebooks at query time
44+55+CREATE TABLE IF NOT EXISTS notebook_counts (
66+ did String,
77+ rkey String,
88+99+ -- Signed for increment/decrement from MVs
1010+ like_count Int64 DEFAULT 0,
1111+ bookmark_count Int64 DEFAULT 0,
1212+ subscriber_count Int64 DEFAULT 0,
1313+1414+ indexed_at DateTime64(3) DEFAULT now64(3)
1515+)
1616+ENGINE = SummingMergeTree((like_count, bookmark_count, subscriber_count))
1717+ORDER BY (did, rkey)
···11+-- Entry engagement counts
22+-- Updated by MVs from likes, bookmarks (added later with graph tables)
33+-- Joined with entries at query time
44+55+CREATE TABLE IF NOT EXISTS entry_counts (
66+ did String,
77+ rkey String,
88+99+ -- Signed for increment/decrement from MVs
1010+ like_count Int64 DEFAULT 0,
1111+ bookmark_count Int64 DEFAULT 0,
1212+1313+ indexed_at DateTime64(3) DEFAULT now64(3)
1414+)
1515+ENGINE = SummingMergeTree((like_count, bookmark_count))
1616+ORDER BY (did, rkey)
···11+-- Edit heads per resource
22+-- Refreshable MV that tracks all branch heads for each resource
33+-- A head is a node with no children (nothing has prev pointing to it)
44+-- Multiple heads = divergent branches needing merge
55+66+CREATE MATERIALIZED VIEW IF NOT EXISTS edit_heads
77+REFRESH EVERY 1 MINUTE
88+ENGINE = ReplacingMergeTree(indexed_at)
99+ORDER BY (resource_did, resource_collection, resource_rkey, head_did, head_rkey)
1010+AS
1111+WITH
1212+ -- All nodes
1313+ all_nodes AS (
1414+ SELECT
1515+ did, rkey, cid, collection, node_type,
1616+ resource_did, resource_collection, resource_rkey,
1717+ root_did, root_rkey,
1818+ prev_did, prev_rkey,
1919+ created_at
2020+ FROM edit_nodes
2121+ WHERE resource_did != ''
2222+ ),
2323+ -- Nodes that are pointed to by prev (have children)
2424+ has_children AS (
2525+ SELECT DISTINCT prev_did as did, prev_rkey as rkey
2626+ FROM all_nodes
2727+ WHERE prev_did != ''
2828+ ),
2929+ -- Root CIDs lookup
3030+ root_cids AS (
3131+ SELECT did, rkey, cid
3232+ FROM edit_nodes
3333+ WHERE node_type = 'root'
3434+ )
3535+-- Heads are nodes with no children
3636+SELECT
3737+ n.resource_did,
3838+ n.resource_collection,
3939+ n.resource_rkey,
4040+ concat('at://', n.resource_did, '/', n.resource_collection, '/', n.resource_rkey) as resource_uri,
4141+4242+ -- This head
4343+ n.did as head_did,
4444+ n.rkey as head_rkey,
4545+ n.cid as head_cid,
4646+ n.collection as head_collection,
4747+ n.node_type as head_type,
4848+ concat('at://', n.did, '/', n.collection, '/', n.rkey) as head_uri,
4949+5050+ -- Root for this branch (with CID)
5151+ if(n.node_type = 'root', n.did, n.root_did) as root_did,
5252+ if(n.node_type = 'root', n.rkey, n.root_rkey) as root_rkey,
5353+ if(n.node_type = 'root', n.cid, coalesce(r.cid, '')) as root_cid,
5454+ if(n.node_type = 'root',
5555+ concat('at://', n.did, '/', n.collection, '/', n.rkey),
5656+ if(n.root_did != '', concat('at://', n.root_did, '/sh.weaver.edit.root/', n.root_rkey), '')
5757+ ) as root_uri,
5858+5959+ n.created_at as head_created_at,
6060+ now64(3) as indexed_at
6161+FROM all_nodes n
6262+LEFT ANTI JOIN has_children c ON n.did = c.did AND n.rkey = c.rkey
6363+LEFT JOIN root_cids r ON r.did = n.root_did AND r.rkey = n.root_rkey
···11+-- Resource permissions
22+-- Refreshable MV that computes who can access each resource
33+-- Combines: owners (resource creator) + collaborators (invite+accept pairs)
44+55+CREATE MATERIALIZED VIEW IF NOT EXISTS permissions
66+REFRESH EVERY 1 MINUTE
77+ENGINE = ReplacingMergeTree(indexed_at)
88+ORDER BY (resource_did, resource_collection, resource_rkey, grantee_did)
99+AS
1010+-- Owners: resource creator has owner permission
1111+SELECT
1212+ did as resource_did,
1313+ 'sh.weaver.notebook.entry' as resource_collection,
1414+ rkey as resource_rkey,
1515+ concat('at://', did, '/sh.weaver.notebook.entry/', rkey) as resource_uri,
1616+1717+ did as grantee_did,
1818+ 'owner' as scope,
1919+2020+ -- Source is the resource itself
2121+ did as source_did,
2222+ 'sh.weaver.notebook.entry' as source_collection,
2323+ rkey as source_rkey,
2424+2525+ created_at as granted_at,
2626+ now64(3) as indexed_at
2727+FROM entries
2828+2929+UNION ALL
3030+3131+SELECT
3232+ did as resource_did,
3333+ 'sh.weaver.notebook.book' as resource_collection,
3434+ rkey as resource_rkey,
3535+ concat('at://', did, '/sh.weaver.notebook.book/', rkey) as resource_uri,
3636+3737+ did as grantee_did,
3838+ 'owner' as scope,
3939+4040+ did as source_did,
4141+ 'sh.weaver.notebook.book' as source_collection,
4242+ rkey as source_rkey,
4343+4444+ created_at as granted_at,
4545+ now64(3) as indexed_at
4646+FROM notebooks
4747+4848+UNION ALL
4949+5050+-- Collaborators: invite+accept pairs grant permission
5151+SELECT
5252+ resource_did,
5353+ resource_collection,
5454+ resource_rkey,
5555+ resource_uri,
5656+5757+ collaborator_did as grantee_did,
5858+ if(scope != '', scope, 'collaborator') as scope,
5959+6060+ invite_did as source_did,
6161+ 'sh.weaver.collab.invite' as source_collection,
6262+ invite_rkey as source_rkey,
6363+6464+ accepted_at as granted_at,
6565+ indexed_at
6666+FROM collaborators
+10-9
crates/weaver-index/src/bin/weaver_indexer.rs
···11use clap::{Parser, Subcommand};
22use tracing::{error, info, warn};
33-use weaver_index::clickhouse::{Client, Migrator, Tables};
33+use weaver_index::clickhouse::{Client, Migrator};
44use weaver_index::config::{
55 ClickHouseConfig, FirehoseConfig, IndexerConfig, ShardConfig, SourceMode, TapConfig,
66};
···7171 let client = Client::new(&config)?;
72727373 if reset {
7474+ let objects = Migrator::all_objects();
7475 if dry_run {
7575- info!("Would drop tables:");
7676- for table in Tables::ALL {
7777- info!(" - {}", table);
7676+ info!("Would drop {} objects:", objects.len());
7777+ for obj in &objects {
7878+ info!(" - {} ({:?})", obj.name, obj.object_type);
7879 }
7980 } else {
8080- info!("Dropping all tables...");
8181- for table in Tables::ALL {
8282- let query = format!("DROP TABLE IF EXISTS {}", table);
8181+ info!("Dropping all tables and views...");
8282+ for obj in &objects {
8383+ let query = obj.drop_statement();
8384 match client.execute(&query).await {
8484- Ok(_) => info!(" dropped {}", table),
8585- Err(e) => warn!(" failed to drop {}: {}", table, e),
8585+ Ok(_) => info!(" dropped {} ({:?})", obj.name, obj.object_type),
8686+ Err(e) => warn!(" failed to drop {}: {}", obj.name, e),
8687 }
8788 }
8889 }
+1-1
crates/weaver-index/src/clickhouse.rs
···33mod schema;
4455pub use client::{Client, TableSize};
66-pub use migrations::{MigrationResult, Migrator};
66+pub use migrations::{DbObject, MigrationResult, Migrator, ObjectType};
77pub use schema::{
88 AccountRevState, FirehoseCursor, RawAccountEvent, RawEventDlq, RawIdentityEvent,
99 RawRecordInsert, Tables,
+20-12
crates/weaver-index/src/clickhouse/client.rs
···106106 collection: &str,
107107 rkey: &str,
108108 ) -> Result<Option<RecordRow>, IndexError> {
109109- // FINAL ensures ReplacingMergeTree deduplication is applied
110109 // Order by event_time first (firehose data wins), then indexed_at as tiebreaker
111110 // Include deletes so we can return not-found for deleted records
112111 let query = r#"
113112 SELECT cid, record, operation
114114- FROM raw_records FINAL
113113+ FROM raw_records
115114 WHERE did = ?
116115 AND collection = ?
117116 AND rkey = ?
···191190 /// List records for a repo+collection
192191 ///
193192 /// Returns non-deleted records ordered by rkey, with cursor-based pagination.
193193+ /// Uses window function to get latest operation per rkey and filter out deletes.
194194 pub async fn list_records(
195195 &self,
196196 did: &str,
···202202 let order = if reverse { "DESC" } else { "ASC" };
203203 let cursor_op = if reverse { "<" } else { ">" };
204204205205- // Build query with optional cursor
205205+ // Use window function to get latest version per rkey, then filter out deletes
206206 let query = if cursor.is_some() {
207207 format!(
208208 r#"
209209 SELECT rkey, cid, record
210210- FROM raw_records FINAL
211211- WHERE did = ?
212212- AND collection = ?
213213- AND rkey {cursor_op} ?
214214- AND operation != 'delete'
210210+ FROM (
211211+ SELECT rkey, cid, record, operation,
212212+ ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY event_time DESC, indexed_at DESC) as rn
213213+ FROM raw_records
214214+ WHERE did = ?
215215+ AND collection = ?
216216+ AND rkey {cursor_op} ?
217217+ )
218218+ WHERE rn = 1 AND operation != 'delete'
215219 ORDER BY rkey {order}
216220 LIMIT ?
217221 "#,
···220224 format!(
221225 r#"
222226 SELECT rkey, cid, record
223223- FROM raw_records FINAL
224224- WHERE did = ?
225225- AND collection = ?
226226- AND operation != 'delete'
227227+ FROM (
228228+ SELECT rkey, cid, record, operation,
229229+ ROW_NUMBER() OVER (PARTITION BY rkey ORDER BY event_time DESC, indexed_at DESC) as rn
230230+ FROM raw_records
231231+ WHERE did = ?
232232+ AND collection = ?
233233+ )
234234+ WHERE rn = 1 AND operation != 'delete'
227235 ORDER BY rkey {order}
228236 LIMIT ?
229237 "#,
+87-1
crates/weaver-index/src/clickhouse/migrations.rs
···11use crate::error::{ClickHouseError, IndexError};
22use include_dir::{Dir, include_dir};
33+use regex::Regex;
34use tracing::info;
4556use super::Client;
···78/// Embedded migrations directory - compiled into the binary
89static MIGRATIONS_DIR: Dir = include_dir!("$CARGO_MANIFEST_DIR/migrations/clickhouse");
9101111+/// Type of database object
1212+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1313+pub enum ObjectType {
1414+ Table,
1515+ MaterializedView,
1616+ View,
1717+}
1818+1919+/// A database object (table or view) extracted from migrations
2020+#[derive(Debug, Clone)]
2121+pub struct DbObject {
2222+ pub name: String,
2323+ pub object_type: ObjectType,
2424+}
2525+2626+impl DbObject {
2727+ /// Get the DROP statement for this object
2828+ pub fn drop_statement(&self) -> String {
2929+ match self.object_type {
3030+ ObjectType::Table => format!("DROP TABLE IF EXISTS {}", self.name),
3131+ ObjectType::MaterializedView | ObjectType::View => {
3232+ format!("DROP VIEW IF EXISTS {}", self.name)
3333+ }
3434+ }
3535+ }
3636+}
3737+1038/// Migration runner for ClickHouse
1139pub struct Migrator<'a> {
1240 client: &'a Client,
···3260 files
3361 }
34626363+ /// Extract all database objects (tables, views) from migrations
6464+ /// Returns them in reverse order for safe dropping (MVs before their source tables)
6565+ pub fn all_objects() -> Vec<DbObject> {
6666+ let table_re =
6767+ Regex::new(r"(?i)CREATE\s+TABLE\s+IF\s+NOT\s+EXISTS\s+(\w+)").unwrap();
6868+ let mv_re =
6969+ Regex::new(r"(?i)CREATE\s+MATERIALIZED\s+VIEW\s+IF\s+NOT\s+EXISTS\s+(\w+)").unwrap();
7070+ let view_re =
7171+ Regex::new(r"(?i)CREATE\s+VIEW\s+IF\s+NOT\s+EXISTS\s+(\w+)").unwrap();
7272+7373+ let mut objects = Vec::new();
7474+7575+ for (_, sql) in Self::migrations() {
7676+ // Find all materialized views
7777+ for caps in mv_re.captures_iter(sql) {
7878+ objects.push(DbObject {
7979+ name: caps[1].to_string(),
8080+ object_type: ObjectType::MaterializedView,
8181+ });
8282+ }
8383+ // Find all regular views (excluding MVs already matched)
8484+ for caps in view_re.captures_iter(sql) {
8585+ let name = caps[1].to_string();
8686+ // Skip if already added as MV
8787+ if !objects.iter().any(|o| o.name == name) {
8888+ objects.push(DbObject {
8989+ name,
9090+ object_type: ObjectType::View,
9191+ });
9292+ }
9393+ }
9494+ // Find all tables
9595+ for caps in table_re.captures_iter(sql) {
9696+ objects.push(DbObject {
9797+ name: caps[1].to_string(),
9898+ object_type: ObjectType::Table,
9999+ });
100100+ }
101101+ }
102102+103103+ // Reverse so MVs/views come before their source tables
104104+ objects.reverse();
105105+ objects
106106+ }
107107+35108 /// Run all pending migrations
36109 pub async fn run(&self) -> Result<MigrationResult, IndexError> {
37110 // First, ensure the migrations table exists (bootstrap)
···57130 }
5813159132 info!(migration = %name, "applying migration");
6060- self.client.execute(sql).await?;
133133+134134+ // Split by semicolons and execute each statement
135135+ for statement in Self::split_statements(sql) {
136136+ self.client.execute(statement).await?;
137137+ }
138138+61139 self.record_migration(name).await?;
62140 applied_count += 1;
63141 }
···66144 applied: applied_count,
67145 skipped: skipped_count,
68146 })
147147+ }
148148+149149+ /// Split SQL into individual statements
150150+ fn split_statements(sql: &str) -> Vec<&str> {
151151+ sql.split(';')
152152+ .map(|s| s.trim())
153153+ .filter(|s| !s.is_empty())
154154+ .collect()
69155 }
7015671157 /// Check which migrations would be applied without running them