···4pub mod constellation;
5pub mod error;
6pub mod resolve;
007pub mod transport;
8pub mod worker_rt;
9
···4pub mod constellation;
5pub mod error;
6pub mod resolve;
7+#[cfg(feature = "telemetry")]
8+pub mod telemetry;
9pub mod transport;
10pub mod worker_rt;
11
···35 -- Populated by async batch validation, not in hot path
36 validation_state LowCardinality(String) DEFAULT 'unchecked',
37000038 -- Materialized AT URI for convenience
39 uri String MATERIALIZED concat('at://', did, '/', collection, '/', rkey),
40
···35 -- Populated by async batch validation, not in hot path
36 validation_state LowCardinality(String) DEFAULT 'unchecked',
3738+ -- Whether this came from live firehose (true) or backfill (false)
39+ -- Backfill events may not reflect current state until repo is fully synced
40+ is_live Bool DEFAULT true,
41+42 -- Materialized AT URI for convenience
43 uri String MATERIALIZED concat('at://', did, '/', collection, '/', rkey),
44
+2-8
crates/weaver-index/src/bin/storage_benchmark.rs
···178 let firehose_config = FirehoseConfig::from_env()?;
179180 info!(
181- "Connecting to ClickHouse at {} (database: {})",
182 ch_config.url, ch_config.database
183 );
184 let client = Client::new(&ch_config)?;
···189 drop_benchmark_tables(&client).await?;
190 }
191192- // Create tables
193 info!("Creating benchmark tables...");
194 create_benchmark_tables(&client).await?;
195196- // Create inserters
197 let mut json_inserter = client.inserter::<RawRecordJson>(TABLE_JSON);
198 let mut cbor_inserter = client.inserter::<RawRecordCbor>(TABLE_CBOR);
199200- // Connect to firehose
201- info!("Connecting to firehose at {}", firehose_config.relay_url);
202 let consumer = FirehoseConsumer::new(firehose_config);
203 let mut stream = consumer.connect().await?;
204···353 }
354 }
355356- // Final flush
357 info!("Flushing remaining records...");
358 json_inserter
359 .end()
···370 source: e,
371 })?;
372373- // Final report
374 info!("\n========== FINAL RESULTS ==========");
375 report_progress(
376 &client,
···453 errors
454 );
455456- // Lag info - critical for detecting if we're falling behind
457 if lag.sample_count > 0 {
458 info!(
459 " Lag: current={:.1}s, min={:.1}s, max={:.1}s (window)",
···178 let firehose_config = FirehoseConfig::from_env()?;
179180 info!(
181+ "Connecting to ClickHouse at:\n{} (database: {})",
182 ch_config.url, ch_config.database
183 );
184 let client = Client::new(&ch_config)?;
···189 drop_benchmark_tables(&client).await?;
190 }
1910192 info!("Creating benchmark tables...");
193 create_benchmark_tables(&client).await?;
1940195 let mut json_inserter = client.inserter::<RawRecordJson>(TABLE_JSON);
196 let mut cbor_inserter = client.inserter::<RawRecordCbor>(TABLE_CBOR);
197198+ info!("Connecting to firehose at:\n {}", firehose_config.relay_url);
0199 let consumer = FirehoseConsumer::new(firehose_config);
200 let mut stream = consumer.connect().await?;
201···350 }
351 }
3520353 info!("Flushing remaining records...");
354 json_inserter
355 .end()
···366 source: e,
367 })?;
3680369 info!("\n========== FINAL RESULTS ==========");
370 report_progress(
371 &client,
···448 errors
449 );
4500451 if lag.sample_count > 0 {
452 info!(
453 " Lag: current={:.1}s, min={:.1}s, max={:.1}s (window)",
+132-25
crates/weaver-index/src/bin/weaver_indexer.rs
···1use clap::{Parser, Subcommand};
2-use miette::IntoDiagnostic;
3-use tracing::{Level, info, warn};
4-use tracing_subscriber::EnvFilter;
5use weaver_index::clickhouse::{Client, Migrator, Tables};
6-use weaver_index::config::{ClickHouseConfig, FirehoseConfig, IndexerConfig};
007use weaver_index::firehose::FirehoseConsumer;
8-use weaver_index::{Indexer, load_cursor};
0910#[derive(Parser)]
11#[command(name = "indexer")]
···31 /// Check database connectivity
32 Health,
3334- /// Start the indexer service (not yet implemented)
35 Run,
00000036}
3738#[tokio::main]
39async fn main() -> miette::Result<()> {
40 dotenvy::dotenv().ok();
4142- let console_level = if cfg!(debug_assertions) {
43- Level::DEBUG
44- } else {
45- Level::INFO
46- };
47-48- tracing_subscriber::fmt()
49- .with_env_filter(
50- tracing_subscriber::EnvFilter::builder()
51- .from_env_lossy()
52- .add_directive(console_level.into())
53- .add_directive("hyper_util=info".parse().into_diagnostic()?),
54- )
55- .init();
5657 let args = Args::parse();
5859 match args.command {
60 Command::Migrate { dry_run, reset } => run_migrate(dry_run, reset).await,
61 Command::Health => run_health().await,
62- Command::Run => run_indexer().await,
0063 }
64}
65···126 Ok(())
127}
128129-async fn run_indexer() -> miette::Result<()> {
00000000000000000000000000000000000000000000000000000000000000000130 let ch_config = ClickHouseConfig::from_env()?;
131- let mut firehose_config = FirehoseConfig::from_env()?;
132 let indexer_config = IndexerConfig::from_env();
0133134 info!(
135 "Connecting to ClickHouse at {} (database: {})",
···137 );
138 let client = Client::new(&ch_config)?;
139000000000000140 // Load cursor from ClickHouse if not overridden by env var
141 if firehose_config.cursor.is_none() {
142 if let Some(cursor) = load_cursor(&client).await? {
···150 );
151 let consumer = FirehoseConsumer::new(firehose_config);
152153- let indexer = Indexer::new(client, consumer, indexer_config).await?;
000154155- info!("Starting indexer");
0000000000156 indexer.run().await?;
157158 Ok(())
159}
0000000000000000000
···1use clap::{Parser, Subcommand};
2+use tracing::{error, info, warn};
003use weaver_index::clickhouse::{Client, Migrator, Tables};
4+use weaver_index::config::{
5+ ClickHouseConfig, FirehoseConfig, IndexerConfig, ShardConfig, SourceMode, TapConfig,
6+};
7use weaver_index::firehose::FirehoseConsumer;
8+use weaver_index::server::{AppState, ServerConfig, TelemetryConfig, telemetry};
9+use weaver_index::{FirehoseIndexer, TapIndexer, load_cursor};
1011#[derive(Parser)]
12#[command(name = "indexer")]
···32 /// Check database connectivity
33 Health,
3435+ /// Start the full service (indexer + HTTP server)
36 Run,
37+38+ /// Start only the HTTP server (no indexing)
39+ Serve,
40+41+ /// Start only the indexer (no HTTP server)
42+ Index,
43}
4445#[tokio::main]
46async fn main() -> miette::Result<()> {
47 dotenvy::dotenv().ok();
4849+ // Initialize telemetry (metrics + tracing with optional Loki)
50+ let telemetry_config = TelemetryConfig::from_env("weaver-index");
51+ telemetry::init(telemetry_config).await;
000000000005253 let args = Args::parse();
5455 match args.command {
56 Command::Migrate { dry_run, reset } => run_migrate(dry_run, reset).await,
57 Command::Health => run_health().await,
58+ Command::Run => run_full().await,
59+ Command::Serve => run_server_only().await,
60+ Command::Index => run_indexer_only().await,
61 }
62}
63···124 Ok(())
125}
126127+/// Run both indexer and HTTP server concurrently (production mode)
128+async fn run_full() -> miette::Result<()> {
129+ let ch_config = ClickHouseConfig::from_env()?;
130+ let shard_config = ShardConfig::from_env();
131+ let server_config = ServerConfig::from_env();
132+ let indexer_config = IndexerConfig::from_env();
133+ let source_mode = SourceMode::from_env();
134+135+ info!(
136+ "Connecting to ClickHouse at {} (database: {})",
137+ ch_config.url, ch_config.database
138+ );
139+ info!("SQLite shards at {}", shard_config.base_path.display());
140+141+ // Create separate clients for indexer and server
142+ let indexer_client = Client::new(&ch_config)?;
143+ let server_client = Client::new(&ch_config)?;
144+145+ // Build AppState for server
146+ let state = AppState::new(server_client, shard_config);
147+148+ // Spawn the indexer task
149+ let indexer_handle = match source_mode {
150+ SourceMode::Firehose => {
151+ let mut firehose_config = FirehoseConfig::from_env()?;
152+ if firehose_config.cursor.is_none() {
153+ if let Some(cursor) = load_cursor(&indexer_client).await? {
154+ firehose_config.cursor = Some(cursor);
155+ }
156+ }
157+ info!(
158+ "Connecting to firehose at {} (cursor: {:?})",
159+ firehose_config.relay_url, firehose_config.cursor
160+ );
161+ let consumer = FirehoseConsumer::new(firehose_config);
162+ let indexer = FirehoseIndexer::new(indexer_client, consumer, indexer_config).await?;
163+ info!("Starting firehose indexer");
164+ tokio::spawn(async move { indexer.run().await })
165+ }
166+ SourceMode::Tap => {
167+ let tap_config = TapConfig::from_env()?;
168+ let indexer = TapIndexer::new(indexer_client, tap_config, indexer_config);
169+ info!("Starting tap indexer");
170+ tokio::spawn(async move { indexer.run().await })
171+ }
172+ };
173+174+ // Run server, monitoring indexer health
175+ tokio::select! {
176+ result = weaver_index::server::run(state, server_config) => {
177+ result?;
178+ }
179+ result = indexer_handle => {
180+ match result {
181+ Ok(Ok(())) => info!("Indexer completed"),
182+ Ok(Err(e)) => error!("Indexer failed: {}", e),
183+ Err(e) => error!("Indexer task panicked: {}", e),
184+ }
185+ }
186+ }
187+188+ Ok(())
189+}
190+191+/// Run only the indexer (no HTTP server)
192+async fn run_indexer_only() -> miette::Result<()> {
193 let ch_config = ClickHouseConfig::from_env()?;
0194 let indexer_config = IndexerConfig::from_env();
195+ let source_mode = SourceMode::from_env();
196197 info!(
198 "Connecting to ClickHouse at {} (database: {})",
···200 );
201 let client = Client::new(&ch_config)?;
202203+ match source_mode {
204+ SourceMode::Firehose => run_firehose_indexer(client, indexer_config).await,
205+ SourceMode::Tap => {
206+ let tap_config = TapConfig::from_env()?;
207+ run_tap_indexer(client, tap_config, indexer_config).await
208+ }
209+ }
210+}
211+212+async fn run_firehose_indexer(client: Client, indexer_config: IndexerConfig) -> miette::Result<()> {
213+ let mut firehose_config = FirehoseConfig::from_env()?;
214+215 // Load cursor from ClickHouse if not overridden by env var
216 if firehose_config.cursor.is_none() {
217 if let Some(cursor) = load_cursor(&client).await? {
···225 );
226 let consumer = FirehoseConsumer::new(firehose_config);
227228+ let indexer = FirehoseIndexer::new(client, consumer, indexer_config).await?;
229+230+ info!("Starting firehose indexer");
231+ indexer.run().await?;
232233+ Ok(())
234+}
235+236+async fn run_tap_indexer(
237+ client: Client,
238+ tap_config: TapConfig,
239+ indexer_config: IndexerConfig,
240+) -> miette::Result<()> {
241+ let indexer = TapIndexer::new(client, tap_config, indexer_config);
242+243+ info!("Starting tap indexer");
244 indexer.run().await?;
245246 Ok(())
247}
248+249+async fn run_server_only() -> miette::Result<()> {
250+ let ch_config = ClickHouseConfig::from_env()?;
251+ let shard_config = ShardConfig::from_env();
252+ let server_config = ServerConfig::from_env();
253+254+ info!(
255+ "Connecting to ClickHouse at {} (database: {})",
256+ ch_config.url, ch_config.database
257+ );
258+ info!("SQLite shards at {}", shard_config.base_path.display());
259+260+ let client = Client::new(&ch_config)?;
261+262+ let state = AppState::new(client, shard_config);
263+ weaver_index::server::run(state, server_config).await?;
264+265+ Ok(())
266+}
···5use dashmap::DashMap;
6use n0_future::StreamExt;
7use smol_str::{SmolStr, ToSmolStr};
8-use tracing::{debug, info, warn};
910use chrono::DateTime;
11···13 AccountRevState, Client, FirehoseCursor, RawAccountEvent, RawIdentityEvent, RawRecordInsert,
14};
15use crate::config::IndexerConfig;
16-use crate::error::{IndexError, Result};
017use crate::firehose::{
18 Account, Commit, ExtractedRecord, FirehoseConsumer, Identity, MessageStream,
19 SubscribeReposMessage, extract_records,
20};
02122/// Default consumer ID for cursor tracking
23const CONSUMER_ID: &str = "main";
···160 }
161}
162163-/// Main indexer that consumes firehose and writes to ClickHouse
164-pub struct Indexer {
165 client: Arc<Client>,
166 consumer: FirehoseConsumer,
167 rev_cache: RevCache,
168 config: IndexerConfig,
169}
170171-impl Indexer {
172- /// Create a new indexer
173 pub async fn new(
174 client: Client,
175 consumer: FirehoseConsumer,
···226227 info!("starting indexer loop");
228229- while let Some(result) = stream.next().await {
00000000000000000000000000000000000000230 let msg = match result {
231 Ok(msg) => msg,
232 Err(e) => {
···381 operation: record.operation.clone(),
382 seq: record.seq as u64,
383 event_time: record.event_time,
0384 })
385 .await
386 .map_err(|e| crate::error::ClickHouseError::Query {
···455/// Minimal struct for delete lookups - just the fields we need to process the delete
456#[derive(Debug, Clone, clickhouse::Row, serde::Deserialize)]
457struct LookupRawRecord {
0458 did: SmolStr,
0459 collection: SmolStr,
0460 rkey: SmolStr,
0461 record: SmolStr, // JSON string of the original record
462}
463···510 tokio::time::sleep(Duration::from_secs(1)).await;
511 }
512}
00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
···5use dashmap::DashMap;
6use n0_future::StreamExt;
7use smol_str::{SmolStr, ToSmolStr};
8+use tracing::{debug, info, trace, warn};
910use chrono::DateTime;
11···13 AccountRevState, Client, FirehoseCursor, RawAccountEvent, RawIdentityEvent, RawRecordInsert,
14};
15use crate::config::IndexerConfig;
16+use crate::config::TapConfig;
17+use crate::error::{ClickHouseError, IndexError, Result};
18use crate::firehose::{
19 Account, Commit, ExtractedRecord, FirehoseConsumer, Identity, MessageStream,
20 SubscribeReposMessage, extract_records,
21};
22+use crate::tap::{TapConfig as TapConsumerConfig, TapConsumer, TapEvent};
2324/// Default consumer ID for cursor tracking
25const CONSUMER_ID: &str = "main";
···162 }
163}
164165+/// Firehose indexer that consumes AT Protocol firehose and writes to ClickHouse
166+pub struct FirehoseIndexer {
167 client: Arc<Client>,
168 consumer: FirehoseConsumer,
169 rev_cache: RevCache,
170 config: IndexerConfig,
171}
172173+impl FirehoseIndexer {
174+ /// Create a new firehose indexer
175 pub async fn new(
176 client: Client,
177 consumer: FirehoseConsumer,
···228229 info!("starting indexer loop");
230231+ loop {
232+ // Get time until next required flush - must commit before socket timeout (30s)
233+ let records_time = records.time_left().unwrap_or(Duration::from_secs(10));
234+ let identities_time = identities.time_left().unwrap_or(Duration::from_secs(10));
235+ let accounts_time = accounts.time_left().unwrap_or(Duration::from_secs(10));
236+ let time_left = records_time.min(identities_time).min(accounts_time);
237+238+ let result =
239+ match tokio::time::timeout(time_left, stream.next()).await {
240+ Ok(Some(result)) => result,
241+ Ok(None) => {
242+ // Stream ended
243+ break;
244+ }
245+ Err(_) => {
246+ // Timeout - flush inserters to keep INSERT alive
247+ debug!("flush timeout, committing inserters");
248+ records.commit().await.map_err(|e| {
249+ crate::error::ClickHouseError::Query {
250+ message: "periodic records commit failed".into(),
251+ source: e,
252+ }
253+ })?;
254+ identities.commit().await.map_err(|e| {
255+ crate::error::ClickHouseError::Query {
256+ message: "periodic identities commit failed".into(),
257+ source: e,
258+ }
259+ })?;
260+ accounts.commit().await.map_err(|e| {
261+ crate::error::ClickHouseError::Query {
262+ message: "periodic accounts commit failed".into(),
263+ source: e,
264+ }
265+ })?;
266+ continue;
267+ }
268+ };
269+270 let msg = match result {
271 Ok(msg) => msg,
272 Err(e) => {
···421 operation: record.operation.clone(),
422 seq: record.seq as u64,
423 event_time: record.event_time,
424+ is_live: true,
425 })
426 .await
427 .map_err(|e| crate::error::ClickHouseError::Query {
···496/// Minimal struct for delete lookups - just the fields we need to process the delete
497#[derive(Debug, Clone, clickhouse::Row, serde::Deserialize)]
498struct LookupRawRecord {
499+ #[allow(dead_code)]
500 did: SmolStr,
501+ #[allow(dead_code)]
502 collection: SmolStr,
503+ #[allow(dead_code)]
504 rkey: SmolStr,
505+ #[allow(dead_code)]
506 record: SmolStr, // JSON string of the original record
507}
508···555 tokio::time::sleep(Duration::from_secs(1)).await;
556 }
557}
558+559+// ============================================================================
560+// TapIndexer - consumes from tap websocket
561+// ============================================================================
562+563+/// Consumer ID for tap cursor tracking
564+const TAP_CONSUMER_ID: &str = "tap";
565+566+/// Tap indexer that consumes from tap websocket and writes to ClickHouse
567+pub struct TapIndexer {
568+ client: Arc<Client>,
569+ tap_config: TapConfig,
570+ config: IndexerConfig,
571+}
572+573+impl TapIndexer {
574+ /// Create a new tap indexer
575+ pub fn new(client: Client, tap_config: TapConfig, config: IndexerConfig) -> Self {
576+ Self {
577+ client: Arc::new(client),
578+ tap_config,
579+ config,
580+ }
581+ }
582+583+ /// Save tap cursor to ClickHouse for visibility
584+ async fn save_cursor(&self, seq: u64) -> Result<()> {
585+ let query = format!(
586+ "INSERT INTO firehose_cursor (consumer_id, seq, event_time) VALUES ('{}', {}, now64(3))",
587+ TAP_CONSUMER_ID, seq
588+ );
589+590+ self.client.execute(&query).await?;
591+ debug!(seq, "saved tap cursor");
592+ Ok(())
593+ }
594+595+ /// Run the tap indexer loop
596+ pub async fn run(&self) -> Result<()> {
597+ info!(url = %self.tap_config.url, "connecting to tap...");
598+599+ let consumer_config = TapConsumerConfig::new(self.tap_config.url.clone())
600+ .with_acks(self.tap_config.send_acks);
601+ let consumer = TapConsumer::new(consumer_config);
602+603+ let (mut events, ack_tx) = consumer.connect().await?;
604+605+ let mut records = self.client.inserter::<RawRecordInsert>("raw_records");
606+ let mut identities = self
607+ .client
608+ .inserter::<RawIdentityEvent>("raw_identity_events");
609+610+ let mut processed: u64 = 0;
611+ let mut last_seq: u64 = 0;
612+ let mut last_stats = Instant::now();
613+ let mut last_cursor_save = Instant::now();
614+615+ info!("starting tap indexer loop");
616+617+ loop {
618+ // Get time until next required flush - must commit before socket timeout (30s)
619+ let records_time = records.time_left().unwrap_or(Duration::from_secs(10));
620+ let identities_time = identities.time_left().unwrap_or(Duration::from_secs(10));
621+ let time_left = records_time.min(identities_time);
622+623+ let event = match tokio::time::timeout(time_left, events.recv()).await {
624+ Ok(Some(event)) => event,
625+ Ok(None) => {
626+ // Channel closed, exit loop
627+ break;
628+ }
629+ Err(_) => {
630+ // Timeout - flush inserters to keep INSERT alive
631+ trace!("flush timeout, committing inserters");
632+ records.commit().await.map_err(|e| ClickHouseError::Query {
633+ message: "periodic records commit failed".into(),
634+ source: e,
635+ })?;
636+ identities
637+ .commit()
638+ .await
639+ .map_err(|e| ClickHouseError::Query {
640+ message: "periodic identities commit failed".into(),
641+ source: e,
642+ })?;
643+ continue;
644+ }
645+ };
646+647+ let event_id = event.id();
648+ last_seq = event_id;
649+650+ match event {
651+ TapEvent::Record(envelope) => {
652+ let record = &envelope.record;
653+654+ // Collection filter
655+ if !self.config.collections.matches(&record.collection) {
656+ // Still ack even if filtered
657+ let _ = ack_tx.send(event_id).await;
658+ continue;
659+ }
660+661+ let json = record
662+ .record
663+ .as_ref()
664+ .map(|v| serde_json::to_string(v).unwrap_or_default())
665+ .unwrap_or_default();
666+667+ debug!(
668+ op = record.action.as_str(),
669+ id = event_id,
670+ len = json.len(),
671+ "writing record"
672+ );
673+674+ records
675+ .write(&RawRecordInsert {
676+ did: record.did.clone(),
677+ collection: record.collection.clone(),
678+ rkey: record.rkey.clone(),
679+ cid: record.cid.clone(),
680+ rev: record.rev.clone(),
681+ record: json.to_smolstr(),
682+ operation: record.action.as_str().to_smolstr(),
683+ seq: event_id,
684+ event_time: Utc::now(),
685+ is_live: record.live,
686+ })
687+ .await
688+ .map_err(|e| ClickHouseError::Query {
689+ message: "record write failed".into(),
690+ source: e,
691+ })?;
692+ records.commit().await.map_err(|e| ClickHouseError::Query {
693+ message: format!("record commit failed for id {}", event_id),
694+ source: e,
695+ })?;
696+697+ processed += 1;
698+ }
699+ TapEvent::Identity(envelope) => {
700+ let identity = &envelope.identity;
701+702+ identities
703+ .write(&RawIdentityEvent {
704+ did: identity.did.clone(),
705+ handle: identity.handle.clone(),
706+ seq: event_id,
707+ event_time: Utc::now(),
708+ })
709+ .await
710+ .map_err(|e| ClickHouseError::Query {
711+ message: "identity write failed".into(),
712+ source: e,
713+ })?;
714+ identities
715+ .commit()
716+ .await
717+ .map_err(|e| ClickHouseError::Query {
718+ message: "identity commit failed".into(),
719+ source: e,
720+ })?;
721+ }
722+ }
723+724+ // Send ack after successful write+commit
725+ let _ = ack_tx.send(event_id).await;
726+727+ // Periodic stats
728+ if last_stats.elapsed() >= Duration::from_secs(10) {
729+ info!(processed, last_seq, "tap indexer stats");
730+ last_stats = Instant::now();
731+ }
732+733+ // Save cursor every 30s for visibility
734+ if last_cursor_save.elapsed() >= Duration::from_secs(30) && last_seq > 0 {
735+ if let Err(e) = self.save_cursor(last_seq).await {
736+ warn!(error = ?e, "failed to save tap cursor");
737+ }
738+ last_cursor_save = Instant::now();
739+ }
740+ }
741+742+ // Final flush
743+ records.end().await.map_err(|e| ClickHouseError::Query {
744+ message: "final records flush failed".into(),
745+ source: e,
746+ })?;
747+ identities.end().await.map_err(|e| ClickHouseError::Query {
748+ message: "final identities flush failed".into(),
749+ source: e,
750+ })?;
751+752+ // Final cursor save
753+ if last_seq > 0 {
754+ self.save_cursor(last_seq).await?;
755+ }
756+757+ info!(last_seq, "tap stream ended");
758+ Ok(())
759+ }
760+}
+7-1
crates/weaver-index/src/lib.rs
···1pub mod clickhouse;
2pub mod config;
03pub mod error;
4pub mod firehose;
5pub mod indexer;
00067pub use config::Config;
8pub use error::{IndexError, Result};
9-pub use indexer::{load_cursor, Indexer};
00
···1pub mod clickhouse;
2pub mod config;
3+pub mod endpoints;
4pub mod error;
5pub mod firehose;
6pub mod indexer;
7+pub mod server;
8+pub mod sqlite;
9+pub mod tap;
1011pub use config::Config;
12pub use error::{IndexError, Result};
13+pub use indexer::{FirehoseIndexer, TapIndexer, load_cursor};
14+pub use server::{AppState, ServerConfig};
15+pub use sqlite::{ShardKey, ShardRouter, SqliteShard};
···1+-- Edit graph storage (roots and diffs)
2+-- Supports DAG structure for future merge support
3+4+CREATE TABLE edit_nodes (
5+ -- Edit record identity (decomposed)
6+ did TEXT NOT NULL,
7+ collection TEXT NOT NULL, -- 'sh.weaver.edit.root' or 'sh.weaver.edit.diff'
8+ rkey TEXT NOT NULL,
9+10+ -- Resource being edited (decomposed)
11+ resource_did TEXT NOT NULL,
12+ resource_collection TEXT NOT NULL,
13+ resource_rkey TEXT NOT NULL,
14+15+ node_type TEXT NOT NULL, -- 'root' | 'diff'
16+ created_at TEXT NOT NULL,
17+ indexed_at TEXT NOT NULL,
18+19+ PRIMARY KEY (did, collection, rkey)
20+);
21+22+CREATE INDEX idx_edit_nodes_resource ON edit_nodes(resource_did, resource_collection, resource_rkey);
23+CREATE INDEX idx_edit_nodes_author ON edit_nodes(did);
24+25+-- Edit graph edges (supports DAG)
26+CREATE TABLE edit_edges (
27+ -- Child reference (decomposed)
28+ child_did TEXT NOT NULL,
29+ child_collection TEXT NOT NULL,
30+ child_rkey TEXT NOT NULL,
31+32+ -- Parent reference (decomposed)
33+ parent_did TEXT NOT NULL,
34+ parent_collection TEXT NOT NULL,
35+ parent_rkey TEXT NOT NULL,
36+37+ edge_type TEXT NOT NULL, -- 'prev' | 'merge' (future)
38+39+ PRIMARY KEY (child_did, child_collection, child_rkey, parent_did, parent_collection, parent_rkey),
40+ FOREIGN KEY (child_did, child_collection, child_rkey) REFERENCES edit_nodes(did, collection, rkey),
41+ FOREIGN KEY (parent_did, parent_collection, parent_rkey) REFERENCES edit_nodes(did, collection, rkey)
42+);
43+44+CREATE INDEX idx_edit_edges_parent ON edit_edges(parent_did, parent_collection, parent_rkey);
45+46+-- Fast path: track current head per resource
47+CREATE TABLE edit_heads (
48+ -- Resource identity (decomposed)
49+ resource_did TEXT NOT NULL,
50+ resource_collection TEXT NOT NULL,
51+ resource_rkey TEXT NOT NULL,
52+53+ -- Latest root reference (decomposed)
54+ root_did TEXT,
55+ root_collection TEXT,
56+ root_rkey TEXT,
57+58+ -- Current head reference (decomposed)
59+ head_did TEXT,
60+ head_collection TEXT,
61+ head_rkey TEXT,
62+63+ updated_at TEXT NOT NULL,
64+65+ PRIMARY KEY (resource_did, resource_collection, resource_rkey)
66+);
···1+-- Valid collaborators (invite + accept pairs)
2+CREATE TABLE collaborators (
3+ -- Resource reference (decomposed)
4+ resource_did TEXT NOT NULL,
5+ resource_collection TEXT NOT NULL,
6+ resource_rkey TEXT NOT NULL,
7+8+ collaborator_did TEXT NOT NULL,
9+10+ -- Invite record reference (decomposed)
11+ invite_did TEXT NOT NULL,
12+ invite_rkey TEXT NOT NULL,
13+14+ -- Accept record reference (decomposed)
15+ accept_did TEXT NOT NULL,
16+ accept_rkey TEXT NOT NULL,
17+18+ scope TEXT NOT NULL, -- 'direct' | 'inherited'
19+ granted_at TEXT NOT NULL,
20+ indexed_at TEXT NOT NULL,
21+22+ PRIMARY KEY (resource_did, resource_collection, resource_rkey, collaborator_did)
23+);
24+25+CREATE INDEX idx_collaborators_did ON collaborators(collaborator_did);
26+27+-- Active sessions (TTL-based, cleaned up on expiry)
28+CREATE TABLE sessions (
29+ -- Session record identity (decomposed)
30+ did TEXT NOT NULL,
31+ rkey TEXT NOT NULL,
32+33+ -- Resource reference (decomposed)
34+ resource_did TEXT NOT NULL,
35+ resource_collection TEXT NOT NULL,
36+ resource_rkey TEXT NOT NULL,
37+38+ participant_did TEXT NOT NULL,
39+ node_id TEXT NOT NULL,
40+ relay_url TEXT, -- NULL if no relay
41+ created_at TEXT NOT NULL,
42+ expires_at TEXT, -- NULL = no expiry
43+ indexed_at TEXT NOT NULL,
44+45+ PRIMARY KEY (did, rkey)
46+);
47+48+CREATE INDEX idx_sessions_resource ON sessions(resource_did, resource_collection, resource_rkey);
49+CREATE INDEX idx_sessions_expires ON sessions(expires_at);
50+51+-- Pending invites (no accept yet)
52+CREATE TABLE pending_invites (
53+ -- Invite record identity (decomposed)
54+ did TEXT NOT NULL, -- inviter DID
55+ rkey TEXT NOT NULL,
56+57+ -- Resource reference (decomposed)
58+ resource_did TEXT NOT NULL,
59+ resource_collection TEXT NOT NULL,
60+ resource_rkey TEXT NOT NULL,
61+62+ inviter_did TEXT NOT NULL, -- same as did
63+ invitee_did TEXT NOT NULL,
64+ message TEXT, -- NULL if no message
65+ expires_at TEXT, -- NULL = no expiry
66+ created_at TEXT NOT NULL,
67+ indexed_at TEXT NOT NULL,
68+69+ PRIMARY KEY (did, rkey)
70+);
71+72+CREATE INDEX idx_pending_invites_resource ON pending_invites(resource_did, resource_collection, resource_rkey);
73+CREATE INDEX idx_pending_invites_invitee ON pending_invites(invitee_did);
···1+-- Permissions cache
2+-- Local cache of permissions for collab-related hot paths.
3+-- ClickHouse is authoritative; this is populated on-demand for active resources.
4+CREATE TABLE permissions (
5+ -- Resource reference (decomposed)
6+ resource_did TEXT NOT NULL,
7+ resource_collection TEXT NOT NULL,
8+ resource_rkey TEXT NOT NULL,
9+10+ did TEXT NOT NULL, -- user who has permission
11+12+ scope TEXT NOT NULL, -- 'owner' | 'direct' | 'inherited'
13+14+ -- Source reference (decomposed) - resource itself for owner, invite for others
15+ source_did TEXT NOT NULL,
16+ source_collection TEXT NOT NULL,
17+ source_rkey TEXT NOT NULL,
18+19+ granted_at TEXT NOT NULL,
20+21+ PRIMARY KEY (resource_did, resource_collection, resource_rkey, did)
22+);
23+24+CREATE INDEX idx_permissions_did ON permissions(did);
+5
crates/weaver-index/src/tap.rs
···00000
···1+mod consumer;
2+mod types;
3+4+pub use consumer::{TapConfig, TapConsumer};
5+pub use types::*;