tangled
alpha
login
or
join now
nonbinary.computer
/
weaver
atproto blogging
24
fork
atom
overview
issues
2
pulls
pipelines
more resilience
Orual
1 month ago
e86033fb
f79bbd8d
+362
-176
4 changed files
expand all
collapse all
unified
split
crates
weaver-index
src
clickhouse
client.rs
resilient_inserter.rs
indexer.rs
docker-compose.yml
+2
-2
crates/weaver-index/src/clickhouse/client.rs
···
50
50
.inserter(table)
51
51
.with_max_rows(1000)
52
52
.with_period_bias(0.1)
53
53
-
.with_period(Some(Duration::from_secs(1)))
54
54
-
.with_max_bytes(1_048_576)
53
53
+
.with_period(Some(Duration::from_secs(2)))
54
54
+
.with_max_bytes(1_048_576 * 2)
55
55
}
56
56
57
57
/// Query table sizes from system.parts
+236
-31
crates/weaver-index/src/clickhouse/resilient_inserter.rs
···
38
38
impl Default for InserterConfig {
39
39
fn default() -> Self {
40
40
Self {
41
41
-
max_rows: 1000,
42
42
-
max_bytes: 1_048_576, // 1MB
43
43
-
period: Some(Duration::from_secs(1)),
41
41
+
max_rows: 10000,
42
42
+
max_bytes: 1_048_576 * 2, // 1MB
43
43
+
period: Some(Duration::from_secs(2)),
44
44
period_bias: 0.1,
45
45
}
46
46
}
···
217
217
self.pending.len()
218
218
}
219
219
220
220
-
/// Handle a batch failure by retrying rows individually
220
220
+
/// Get time remaining until the next scheduled flush
221
221
+
pub fn time_left(&mut self) -> Option<std::time::Duration> {
222
222
+
self.inner.time_left()
223
223
+
}
224
224
+
225
225
+
/// Handle a batch failure by retrying rows
226
226
+
///
227
227
+
/// Attempts to extract the failing row number from the error message.
228
228
+
/// If found, batches rows before/after the failure point for efficiency.
229
229
+
/// Falls back to individual retries if row number unavailable or sub-batches fail.
221
230
async fn handle_batch_failure(
222
231
&mut self,
223
232
original_error: clickhouse::error::Error,
···
238
247
// Create fresh inserter (old one is poisoned after error)
239
248
self.inner = Self::create_inserter(&self.client, &self.config);
240
249
250
250
+
// Try to extract failing row number for smart retry
251
251
+
if let Some(failing_row) = extract_failing_row(&original_error) {
252
252
+
// Subtract 2 for safety margin (1-indexed to 0-indexed, plus buffer)
253
253
+
let safe_row = failing_row.saturating_sub(2);
254
254
+
255
255
+
if safe_row > 0 && safe_row < total {
256
256
+
debug!(
257
257
+
failing_row,
258
258
+
safe_row, total, "extracted failing row, attempting smart retry"
259
259
+
);
260
260
+
return self.smart_retry(rows, safe_row, &original_error).await;
261
261
+
}
262
262
+
}
263
263
+
264
264
+
// Fall back to individual retries
265
265
+
debug!(total, "no row number found, retrying individually");
266
266
+
self.retry_individually(rows).await
267
267
+
}
268
268
+
269
269
+
/// Smart retry: batch rows before failure, DLQ the bad row, batch rows after
270
270
+
async fn smart_retry(
271
271
+
&mut self,
272
272
+
rows: Vec<RawRecordInsert>,
273
273
+
failing_idx: usize,
274
274
+
original_error: &clickhouse::error::Error,
275
275
+
) -> Result<Quantities, IndexError> {
276
276
+
let total = rows.len();
241
277
let mut succeeded = 0u64;
242
278
let mut failed = 0u64;
243
279
244
244
-
for row in rows {
245
245
-
match self.try_single_insert(&row).await {
280
280
+
// Try to batch insert rows before the failure point
281
281
+
if failing_idx > 0 {
282
282
+
let before = &rows[..failing_idx];
283
283
+
debug!(count = before.len(), "batch inserting rows before failure");
284
284
+
285
285
+
match self.batch_insert(before).await {
286
286
+
Ok(count) => {
287
287
+
succeeded += count;
288
288
+
debug!(count, "pre-failure batch succeeded");
289
289
+
}
290
290
+
Err(e) => {
291
291
+
// Sub-batch failed, fall back to individual for this chunk
292
292
+
warn!(error = ?e, "pre-failure batch failed, retrying individually");
293
293
+
let (s, f) = self.retry_individually_slice(before).await?;
294
294
+
succeeded += s;
295
295
+
failed += f;
296
296
+
}
297
297
+
}
298
298
+
}
299
299
+
300
300
+
// Send the failing row (and a couple around it) to DLQ
301
301
+
let dlq_start = failing_idx;
302
302
+
let dlq_end = (failing_idx + 3).min(total); // failing row + 2 more for safety
303
303
+
for row in &rows[dlq_start..dlq_end] {
304
304
+
warn!(
305
305
+
did = %row.did,
306
306
+
collection = %row.collection,
307
307
+
rkey = %row.rkey,
308
308
+
seq = row.seq,
309
309
+
"sending suspected bad row to DLQ"
310
310
+
);
311
311
+
self.send_to_dlq(row, original_error).await?;
312
312
+
failed += 1;
313
313
+
}
314
314
+
315
315
+
// Try to batch insert rows after the failure point
316
316
+
if dlq_end < total {
317
317
+
let after = &rows[dlq_end..];
318
318
+
debug!(count = after.len(), "batch inserting rows after failure");
319
319
+
320
320
+
match self.batch_insert(after).await {
321
321
+
Ok(count) => {
322
322
+
succeeded += count;
323
323
+
debug!(count, "post-failure batch succeeded");
324
324
+
}
325
325
+
Err(e) => {
326
326
+
// Sub-batch failed, fall back to individual for this chunk
327
327
+
warn!(error = ?e, "post-failure batch failed, retrying individually");
328
328
+
let (s, f) = self.retry_individually_slice(after).await?;
329
329
+
succeeded += s;
330
330
+
failed += f;
331
331
+
}
332
332
+
}
333
333
+
}
334
334
+
335
335
+
debug!(total, succeeded, failed, "smart retry complete");
336
336
+
337
337
+
Ok(Quantities {
338
338
+
rows: succeeded,
339
339
+
bytes: 0,
340
340
+
transactions: 0,
341
341
+
})
342
342
+
}
343
343
+
344
344
+
/// Batch insert a slice of rows using a fresh one-shot inserter
345
345
+
async fn batch_insert(
346
346
+
&mut self,
347
347
+
rows: &[RawRecordInsert],
348
348
+
) -> Result<u64, clickhouse::error::Error> {
349
349
+
batch_insert_rows(&self.client, rows).await
350
350
+
}
351
351
+
352
352
+
/// Retry a vec of rows individually, returning (succeeded, failed) counts
353
353
+
async fn retry_individually(
354
354
+
&mut self,
355
355
+
rows: Vec<RawRecordInsert>,
356
356
+
) -> Result<Quantities, IndexError> {
357
357
+
let (succeeded, failed) = self.retry_individually_slice(&rows).await?;
358
358
+
359
359
+
if failed > 0 {
360
360
+
warn!(
361
361
+
succeeded,
362
362
+
failed, "individual retry had failures sent to DLQ"
363
363
+
);
364
364
+
}
365
365
+
366
366
+
Ok(Quantities {
367
367
+
rows: succeeded,
368
368
+
bytes: 0,
369
369
+
transactions: 0,
370
370
+
})
371
371
+
}
372
372
+
373
373
+
/// Retry a slice of rows individually, returning (succeeded, failed) counts
374
374
+
async fn retry_individually_slice(
375
375
+
&mut self,
376
376
+
rows: &[RawRecordInsert],
377
377
+
) -> Result<(u64, u64), IndexError> {
378
378
+
let total = rows.len();
379
379
+
let mut succeeded = 0u64;
380
380
+
let mut failed = 0u64;
381
381
+
382
382
+
let client = self.client.clone();
383
383
+
384
384
+
for (i, row) in rows.iter().enumerate() {
385
385
+
debug!(i, total, did = %row.did, "retrying row individually");
386
386
+
match try_single_insert(&client, row).await {
246
387
Ok(()) => {
247
388
succeeded += 1;
389
389
+
debug!(i, "row succeeded");
248
390
}
249
391
Err(e) => {
250
392
failed += 1;
···
256
398
error = ?e,
257
399
"row insert failed, sending to DLQ"
258
400
);
259
259
-
self.send_to_dlq(&row, &e).await?;
401
401
+
debug!(i, "sending to DLQ");
402
402
+
self.send_to_dlq(row, &e).await?;
403
403
+
debug!(i, "DLQ write complete");
260
404
}
261
405
}
262
406
}
263
407
264
264
-
debug!(total, succeeded, failed, "batch failure recovery complete");
265
265
-
266
266
-
Ok(Quantities {
267
267
-
rows: succeeded,
268
268
-
bytes: 0,
269
269
-
transactions: 0,
270
270
-
})
271
271
-
}
272
272
-
273
273
-
/// Try to insert a single row using a fresh one-shot inserter
274
274
-
async fn try_single_insert(
275
275
-
&self,
276
276
-
row: &RawRecordInsert,
277
277
-
) -> Result<(), clickhouse::error::Error> {
278
278
-
let mut inserter: Inserter<RawRecordInsert> =
279
279
-
self.client.inserter(Tables::RAW_RECORDS).with_max_rows(1);
280
280
-
281
281
-
inserter.write(row).await?;
282
282
-
inserter.end().await?;
283
283
-
Ok(())
408
408
+
debug!(total, succeeded, failed, "individual retry complete");
409
409
+
Ok((succeeded, failed))
284
410
}
285
411
286
412
/// Send a failed row to the dead-letter queue
···
292
418
let raw_data = serde_json::to_string(row)
293
419
.unwrap_or_else(|e| format!("{{\"serialization_error\": \"{}\"}}", e));
294
420
421
421
+
self.write_raw_to_dlq(row.operation.clone(), raw_data, error.to_string(), row.seq)
422
422
+
.await
423
423
+
}
424
424
+
425
425
+
/// Write a pre-insert failure directly to the DLQ
426
426
+
///
427
427
+
/// Use this for failures that happen before we even have a valid RawRecordInsert,
428
428
+
/// like JSON serialization errors.
429
429
+
pub async fn write_raw_to_dlq(
430
430
+
&mut self,
431
431
+
event_type: SmolStr,
432
432
+
raw_data: String,
433
433
+
error_message: String,
434
434
+
seq: u64,
435
435
+
) -> Result<(), IndexError> {
295
436
let dlq_row = RawEventDlq {
296
296
-
event_type: row.operation.clone(),
437
437
+
event_type,
297
438
raw_data: raw_data.to_smolstr(),
298
298
-
error_message: error.to_smolstr(),
299
299
-
seq: row.seq,
439
439
+
error_message: error_message.to_smolstr(),
440
440
+
seq,
300
441
};
301
442
302
443
self.dlq
···
320
461
}
321
462
}
322
463
464
464
+
/// Try to insert a single row using a fresh one-shot inserter
465
465
+
///
466
466
+
/// Free function to avoid &self borrow across await points (Sync issues)
467
467
+
async fn try_single_insert(
468
468
+
client: &clickhouse::Client,
469
469
+
row: &RawRecordInsert,
470
470
+
) -> Result<(), clickhouse::error::Error> {
471
471
+
let mut inserter: Inserter<RawRecordInsert> =
472
472
+
client.inserter(Tables::RAW_RECORDS).with_max_rows(1);
473
473
+
474
474
+
inserter.write(row).await?;
475
475
+
inserter.force_commit().await?;
476
476
+
inserter.end().await?;
477
477
+
Ok(())
478
478
+
}
479
479
+
480
480
+
/// Batch insert rows using a fresh inserter
481
481
+
///
482
482
+
/// Free function to avoid &self borrow across await points (Sync issues)
483
483
+
async fn batch_insert_rows(
484
484
+
client: &clickhouse::Client,
485
485
+
rows: &[RawRecordInsert],
486
486
+
) -> Result<u64, clickhouse::error::Error> {
487
487
+
let mut inserter: Inserter<RawRecordInsert> = client
488
488
+
.inserter(Tables::RAW_RECORDS)
489
489
+
.with_max_rows(rows.len() as u64);
490
490
+
491
491
+
for row in rows {
492
492
+
inserter.write(row).await?;
493
493
+
}
494
494
+
inserter.end().await?;
495
495
+
Ok(rows.len() as u64)
496
496
+
}
497
497
+
498
498
+
/// Extract the failing row number from a ClickHouse error message
499
499
+
///
500
500
+
/// Looks for patterns like "(at row 791)" in the error text.
501
501
+
/// Returns 1-indexed row number if found.
502
502
+
fn extract_failing_row(error: &clickhouse::error::Error) -> Option<usize> {
503
503
+
let msg = error.to_string();
504
504
+
// Look for "(at row N)"
505
505
+
if let Some(start) = msg.find("(at row ") {
506
506
+
let rest = &msg[start + 8..];
507
507
+
if let Some(end) = rest.find(')') {
508
508
+
return rest[..end].parse().ok();
509
509
+
}
510
510
+
}
511
511
+
None
512
512
+
}
513
513
+
323
514
#[cfg(test)]
324
515
mod tests {
325
325
-
// TODO: Add tests with mock clickhouse client
516
516
+
use super::*;
517
517
+
518
518
+
#[test]
519
519
+
fn test_extract_failing_row() {
520
520
+
// Simulate the error message format from ClickHouse
521
521
+
let msg = "Code: 117. DB::Exception: Cannot parse JSON object here: : (at row 791)\n: While executing BinaryRowInputFormat.";
522
522
+
523
523
+
// We can't easily construct a clickhouse::error::Error, but we can test the parsing logic
524
524
+
assert!(msg.contains("(at row "));
525
525
+
let start = msg.find("(at row ").unwrap();
526
526
+
let rest = &msg[start + 8..];
527
527
+
let end = rest.find(')').unwrap();
528
528
+
let row: usize = rest[..end].parse().unwrap();
529
529
+
assert_eq!(row, 791);
530
530
+
}
326
531
}
+123
-142
crates/weaver-index/src/indexer.rs
···
10
10
use chrono::DateTime;
11
11
12
12
use crate::clickhouse::{
13
13
-
AccountRevState, Client, FirehoseCursor, RawAccountEvent, RawIdentityEvent, RawRecordInsert,
13
13
+
AccountRevState, Client, FirehoseCursor, InserterConfig, RawAccountEvent, RawIdentityEvent,
14
14
+
RawRecordInsert, ResilientRecordInserter,
14
15
};
15
16
use crate::config::IndexerConfig;
16
17
use crate::config::TapConfig;
17
18
use crate::error::{ClickHouseError, IndexError, Result};
18
19
use crate::firehose::{
19
19
-
Account, Commit, ExtractedRecord, FirehoseConsumer, Identity, MessageStream,
20
20
-
SubscribeReposMessage, extract_records,
20
20
+
Account, ExtractedRecord, FirehoseConsumer, Identity, MessageStream, SubscribeReposMessage,
21
21
+
extract_records,
21
22
};
22
23
use crate::tap::{TapConfig as TapConsumerConfig, TapConsumer, TapEvent};
23
24
···
210
211
let mut stream: MessageStream = self.consumer.connect().await?;
211
212
212
213
// Inserters handle batching internally based on config
213
213
-
let mut records = self.client.inserter::<RawRecordInsert>("raw_records");
214
214
+
// Use resilient inserter for records since that's where untrusted JSON enters
215
215
+
let mut records =
216
216
+
ResilientRecordInserter::new(self.client.inner().clone(), InserterConfig::default());
214
217
let mut identities = self
215
218
.client
216
219
.inserter::<RawIdentityEvent>("raw_identity_events");
···
235
238
let accounts_time = accounts.time_left().unwrap_or(Duration::from_secs(10));
236
239
let time_left = records_time.min(identities_time).min(accounts_time);
237
240
238
238
-
let result =
239
239
-
match tokio::time::timeout(time_left, stream.next()).await {
240
240
-
Ok(Some(result)) => result,
241
241
-
Ok(None) => {
242
242
-
// Stream ended
243
243
-
break;
244
244
-
}
245
245
-
Err(_) => {
246
246
-
// Timeout - flush inserters to keep INSERT alive
247
247
-
debug!("flush timeout, committing inserters");
248
248
-
records.commit().await.map_err(|e| {
249
249
-
crate::error::ClickHouseError::Query {
250
250
-
message: "periodic records commit failed".into(),
251
251
-
source: e,
252
252
-
}
241
241
+
let result = match tokio::time::timeout(time_left, stream.next()).await {
242
242
+
Ok(Some(result)) => result,
243
243
+
Ok(None) => {
244
244
+
// Stream ended
245
245
+
break;
246
246
+
}
247
247
+
Err(_) => {
248
248
+
// Timeout - flush inserters to keep INSERT alive
249
249
+
debug!("flush timeout, committing inserters");
250
250
+
records.commit().await?;
251
251
+
identities.commit().await.map_err(|e| {
252
252
+
crate::error::ClickHouseError::Query {
253
253
+
message: "periodic identities commit failed".into(),
254
254
+
source: e,
255
255
+
}
256
256
+
})?;
257
257
+
accounts
258
258
+
.commit()
259
259
+
.await
260
260
+
.map_err(|e| crate::error::ClickHouseError::Query {
261
261
+
message: "periodic accounts commit failed".into(),
262
262
+
source: e,
253
263
})?;
254
254
-
identities.commit().await.map_err(|e| {
255
255
-
crate::error::ClickHouseError::Query {
256
256
-
message: "periodic identities commit failed".into(),
257
257
-
source: e,
258
258
-
}
259
259
-
})?;
260
260
-
accounts.commit().await.map_err(|e| {
261
261
-
crate::error::ClickHouseError::Query {
262
262
-
message: "periodic accounts commit failed".into(),
263
263
-
source: e,
264
264
-
}
265
265
-
})?;
266
266
-
continue;
267
267
-
}
268
268
-
};
264
264
+
continue;
265
265
+
}
266
266
+
};
269
267
270
268
let msg = match result {
271
269
Ok(msg) => msg,
···
294
292
295
293
match msg {
296
294
SubscribeReposMessage::Commit(commit) => {
297
297
-
if self
298
298
-
.process_commit(&commit, &mut records, &mut skipped)
299
299
-
.await?
300
300
-
{
301
301
-
processed += 1;
295
295
+
let did = commit.repo.as_ref();
296
296
+
let rev = commit.rev.as_ref();
297
297
+
298
298
+
// Dedup check
299
299
+
if !self.rev_cache.should_process(did, rev) {
300
300
+
skipped += 1;
301
301
+
continue;
302
302
+
}
303
303
+
304
304
+
// Extract and write records
305
305
+
for record in extract_records(&commit).await? {
306
306
+
// Collection filter - skip early before JSON conversion
307
307
+
if !self.config.collections.matches(&record.collection) {
308
308
+
continue;
309
309
+
}
310
310
+
311
311
+
let json = record.to_json()?.unwrap_or_else(|| "{}".to_string());
312
312
+
313
313
+
// Fire and forget delete handling
314
314
+
if record.operation == "delete" {
315
315
+
let client = self.client.clone();
316
316
+
let record_clone = record.clone();
317
317
+
tokio::spawn(async move {
318
318
+
if let Err(e) = handle_delete(&client, record_clone).await {
319
319
+
warn!(error = ?e, "delete handling failed");
320
320
+
}
321
321
+
});
322
322
+
}
323
323
+
324
324
+
records
325
325
+
.write(RawRecordInsert {
326
326
+
did: record.did.clone(),
327
327
+
collection: record.collection.clone(),
328
328
+
rkey: record.rkey.clone(),
329
329
+
cid: record.cid.clone(),
330
330
+
rev: record.rev.clone(),
331
331
+
record: json.to_smolstr(),
332
332
+
operation: record.operation.clone(),
333
333
+
seq: record.seq as u64,
334
334
+
event_time: record.event_time,
335
335
+
is_live: true,
336
336
+
})
337
337
+
.await?;
302
338
}
339
339
+
340
340
+
// Update rev cache
341
341
+
self.rev_cache.update(
342
342
+
&SmolStr::new(did),
343
343
+
&SmolStr::new(rev),
344
344
+
&commit.commit.0.to_smolstr(),
345
345
+
);
346
346
+
347
347
+
processed += 1;
303
348
}
304
349
SubscribeReposMessage::Identity(identity) => {
305
350
write_identity(&identity, &mut identities).await?;
···
314
359
}
315
360
316
361
// commit() flushes if internal thresholds met, otherwise no-op
317
317
-
records
318
318
-
.commit()
319
319
-
.await
320
320
-
.map_err(|e| crate::error::ClickHouseError::Query {
321
321
-
message: "commit failed".into(),
322
322
-
source: e,
323
323
-
})?;
362
362
+
records.commit().await?;
324
363
325
364
// Periodic stats and cursor save (every 10s)
326
365
if last_stats.elapsed() >= Duration::from_secs(10) {
···
344
383
}
345
384
346
385
// Final flush
347
347
-
records
348
348
-
.end()
349
349
-
.await
350
350
-
.map_err(|e| crate::error::ClickHouseError::Query {
351
351
-
message: "final flush failed".into(),
352
352
-
source: e,
353
353
-
})?;
386
386
+
records.end().await?;
354
387
identities
355
388
.end()
356
389
.await
···
374
407
info!(last_seq, "firehose stream ended");
375
408
Ok(())
376
409
}
377
377
-
378
378
-
async fn process_commit(
379
379
-
&self,
380
380
-
commit: &Commit<'_>,
381
381
-
inserter: &mut clickhouse::inserter::Inserter<RawRecordInsert>,
382
382
-
skipped: &mut u64,
383
383
-
) -> Result<bool> {
384
384
-
let did = commit.repo.as_ref();
385
385
-
let rev = commit.rev.as_ref();
386
386
-
387
387
-
// Dedup check
388
388
-
if !self.rev_cache.should_process(did, rev) {
389
389
-
*skipped += 1;
390
390
-
return Ok(false);
391
391
-
}
392
392
-
393
393
-
// Extract and write records
394
394
-
for record in extract_records(commit).await? {
395
395
-
// Collection filter - skip early before JSON conversion
396
396
-
if !self.config.collections.matches(&record.collection) {
397
397
-
continue;
398
398
-
}
399
399
-
400
400
-
let json = record.to_json()?.unwrap_or_else(|| "{}".to_string());
401
401
-
402
402
-
// Fire and forget delete handling
403
403
-
if record.operation == "delete" {
404
404
-
let client = self.client.clone();
405
405
-
let record_clone = record.clone();
406
406
-
tokio::spawn(async move {
407
407
-
if let Err(e) = handle_delete(&client, record_clone).await {
408
408
-
warn!(error = ?e, "delete handling failed");
409
409
-
}
410
410
-
});
411
411
-
}
412
412
-
413
413
-
inserter
414
414
-
.write(&RawRecordInsert {
415
415
-
did: record.did.clone(),
416
416
-
collection: record.collection.clone(),
417
417
-
rkey: record.rkey.clone(),
418
418
-
cid: record.cid.clone(),
419
419
-
rev: record.rev.clone(),
420
420
-
record: json.to_smolstr(),
421
421
-
operation: record.operation.clone(),
422
422
-
seq: record.seq as u64,
423
423
-
event_time: record.event_time,
424
424
-
is_live: true,
425
425
-
})
426
426
-
.await
427
427
-
.map_err(|e| crate::error::ClickHouseError::Query {
428
428
-
message: "write failed".into(),
429
429
-
source: e,
430
430
-
})?;
431
431
-
}
432
432
-
433
433
-
// Update rev cache
434
434
-
self.rev_cache.update(
435
435
-
&SmolStr::new(did),
436
436
-
&SmolStr::new(rev),
437
437
-
&commit.commit.0.to_smolstr(),
438
438
-
);
439
439
-
440
440
-
Ok(true)
441
441
-
}
442
410
}
443
411
444
412
async fn write_identity(
···
602
570
603
571
let (mut events, ack_tx) = consumer.connect().await?;
604
572
605
605
-
let mut records = self.client.inserter::<RawRecordInsert>("raw_records");
573
573
+
// Use resilient inserter for records since that's where untrusted JSON enters
574
574
+
let mut records =
575
575
+
ResilientRecordInserter::new(self.client.inner().clone(), InserterConfig::default());
606
576
let mut identities = self
607
577
.client
608
578
.inserter::<RawIdentityEvent>("raw_identity_events");
···
629
599
Err(_) => {
630
600
// Timeout - flush inserters to keep INSERT alive
631
601
trace!("flush timeout, committing inserters");
632
632
-
records.commit().await.map_err(|e| ClickHouseError::Query {
633
633
-
message: "periodic records commit failed".into(),
634
634
-
source: e,
635
635
-
})?;
602
602
+
records.commit().await?;
636
603
identities
637
604
.commit()
638
605
.await
···
658
625
continue;
659
626
}
660
627
661
661
-
let json = record
662
662
-
.record
663
663
-
.as_ref()
664
664
-
.map(|v| serde_json::to_string(v).unwrap_or_default())
665
665
-
.unwrap_or_default();
628
628
+
let json = match &record.record {
629
629
+
Some(v) => match serde_json::to_string(v) {
630
630
+
Ok(s) => s,
631
631
+
Err(e) => {
632
632
+
warn!(
633
633
+
did = %record.did,
634
634
+
collection = %record.collection,
635
635
+
rkey = %record.rkey,
636
636
+
error = ?e,
637
637
+
"failed to serialize record, sending to DLQ"
638
638
+
);
639
639
+
let raw_data = format!(
640
640
+
r#"{{"did":"{}","collection":"{}","rkey":"{}","cid":"{}","error":"serialization_failed"}}"#,
641
641
+
record.did, record.collection, record.rkey, record.cid
642
642
+
);
643
643
+
records
644
644
+
.write_raw_to_dlq(
645
645
+
record.action.as_str().to_smolstr(),
646
646
+
raw_data,
647
647
+
e.to_string(),
648
648
+
event_id,
649
649
+
)
650
650
+
.await?;
651
651
+
let _ = ack_tx.send(event_id).await;
652
652
+
continue;
653
653
+
}
654
654
+
},
655
655
+
None => "{}".to_string(),
656
656
+
};
666
657
667
658
debug!(
668
659
op = record.action.as_str(),
···
672
663
);
673
664
674
665
records
675
675
-
.write(&RawRecordInsert {
666
666
+
.write(RawRecordInsert {
676
667
did: record.did.clone(),
677
668
collection: record.collection.clone(),
678
669
rkey: record.rkey.clone(),
···
684
675
event_time: Utc::now(),
685
676
is_live: record.live,
686
677
})
687
687
-
.await
688
688
-
.map_err(|e| ClickHouseError::Query {
689
689
-
message: "record write failed".into(),
690
690
-
source: e,
691
691
-
})?;
692
692
-
records.commit().await.map_err(|e| ClickHouseError::Query {
693
693
-
message: format!("record commit failed for id {}:\n{}", event_id, json),
694
694
-
source: e,
695
695
-
})?;
678
678
+
.await?;
679
679
+
records.commit().await?;
696
680
697
681
processed += 1;
698
682
}
···
740
724
}
741
725
742
726
// Final flush
743
743
-
records.end().await.map_err(|e| ClickHouseError::Query {
744
744
-
message: "final records flush failed".into(),
745
745
-
source: e,
746
746
-
})?;
727
727
+
records.end().await?;
747
728
identities.end().await.map_err(|e| ClickHouseError::Query {
748
729
message: "final identities flush failed".into(),
749
730
source: e,
+1
-1
docker-compose.yml
···
33
33
ports:
34
34
- "3000:3000"
35
35
environment:
36
36
-
RUST_LOG: debug,weaver_index=debug
36
36
+
RUST_LOG: debug,weaver_index=debug,hyper_util::client::legacy::pool=info
37
37
# ClickHouse connection (set these for your cloud/homelab instance)
38
38
CLICKHOUSE_URL: ${CLICKHOUSE_URL}
39
39
CLICKHOUSE_DATABASE: ${CLICKHOUSE_DATABASE:-weaver}