fixed deleted record handling

Orual c11d8208 247da442

+56 -5
+1 -1
crates/weaver-index/migrations/clickhouse/001_raw_records.sql
··· 50 50 ) 51 51 ) 52 52 ENGINE = ReplacingMergeTree(indexed_at) 53 - ORDER BY (collection, did, rkey, event_time) 53 + ORDER BY (collection, did, rkey, event_time, indexed_at) 54 54 SETTINGS deduplicate_merge_projection_mode = 'drop';
+5 -3
crates/weaver-index/src/clickhouse/client.rs
··· 107 107 rkey: &str, 108 108 ) -> Result<Option<RecordRow>, IndexError> { 109 109 // FINAL ensures ReplacingMergeTree deduplication is applied 110 + // Order by event_time first (firehose data wins), then indexed_at as tiebreaker 111 + // Include deletes so we can return not-found for deleted records 110 112 let query = r#" 111 - SELECT cid, record 113 + SELECT cid, record, operation 112 114 FROM raw_records FINAL 113 115 WHERE did = ? 114 116 AND collection = ? 115 117 AND rkey = ? 116 - AND operation != 'delete' 117 - ORDER BY event_time DESC 118 + ORDER BY event_time DESC, indexed_at DESC 118 119 LIMIT 1 119 120 "#; 120 121 ··· 283 284 pub struct RecordRow { 284 285 pub cid: String, 285 286 pub record: String, // JSON string 287 + pub operation: String, 286 288 } 287 289 288 290 /// Record with rkey from raw_records (for listRecords)
+50 -1
crates/weaver-index/src/endpoints/repo.rs
··· 96 96 })?; 97 97 98 98 if let Some(row) = cached { 99 + // Check if record was deleted 100 + if row.operation == "delete" { 101 + return Err(XrpcErrorResponse::not_found("Record not found")); 102 + } 103 + 99 104 // Cache hit - return from ClickHouse 100 105 let value: Data<'_> = serde_json::from_str(&row.record).map_err(|e| { 101 106 tracing::error!("Failed to parse record JSON: {}", e); ··· 103 108 })?; 104 109 105 110 let uri_str = format!("at://{}/{}/{}", did, collection, rkey); 106 - let uri = AtUri::new_owned(uri_str).map_err(|e| { 111 + let uri = AtUri::new_owned(uri_str.clone()).map_err(|e| { 107 112 tracing::error!("Failed to construct AT URI: {}", e); 108 113 XrpcErrorResponse::internal_error("Failed to construct URI") 109 114 })?; ··· 112 117 tracing::error!("Invalid CID in database: {}", e); 113 118 XrpcErrorResponse::internal_error("Invalid CID stored") 114 119 })?; 120 + 121 + // Stale-while-revalidate: check freshness in background 122 + let cached_cid = row.cid.clone(); 123 + let clickhouse = state.clickhouse.clone(); 124 + let resolver = state.resolver.clone(); 125 + let did_str = did.as_str().to_string(); 126 + let collection_str = collection.to_string(); 127 + let rkey_str = rkey.to_string(); 128 + 129 + tokio::spawn(async move { 130 + let uri = match AtUri::new_owned(uri_str) { 131 + Ok(u) => u, 132 + Err(_) => return, 133 + }; 134 + 135 + let upstream = match resolver.fetch_record_slingshot(&uri).await { 136 + Ok(r) => r, 137 + Err(e) => { 138 + tracing::debug!("Background revalidation fetch failed: {}", e); 139 + return; 140 + } 141 + }; 142 + 143 + // Check if CID changed 144 + let upstream_cid = upstream 145 + .cid 146 + .as_ref() 147 + .map(|c| c.as_str()) 148 + .unwrap_or_default(); 149 + 150 + if upstream_cid != cached_cid && !upstream_cid.is_empty() { 151 + let record_json = serde_json::to_string(&upstream.value).unwrap_or_default(); 152 + if !record_json.is_empty() { 153 + if let Err(e) = clickhouse 154 + .insert_record(&did_str, &collection_str, &rkey_str, upstream_cid, &record_json) 155 + .await 156 + { 157 + tracing::warn!("Failed to update stale cache entry: {}", e); 158 + } else { 159 + tracing::debug!("Updated stale cache entry for {}", uri); 160 + } 161 + } 162 + } 163 + }); 115 164 116 165 return Ok(Json( 117 166 GetRecordOutput {