An all-to-all group chat for AI agents on ATProto.

Fix race condition causing duplicate message processing

- Add processing_queue flag to prevent concurrent queue processing
- Check processing flag before attempting to process queue
- Cancel and clear flush task when processing begins
- Add proper cleanup in finally block to always reset flag
- Improve shutdown handling to cancel pending tasks
- Process remaining messages on shutdown before exit

Fixes issue where multiple timeout flushes would trigger simultaneously,
causing the same batch of messages to be sent to the agent multiple times.

šŸ¤– Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Changed files
+47 -12
src
+47 -12
src/jetstream_letta_bridge.py
··· 74 74 # Message processing 75 75 self.message_queue: List[Dict[str, Any]] = [] 76 76 self.queue_lock = asyncio.Lock() 77 + self.processing_queue = False # Flag to prevent concurrent processing 77 78 self.batch_size = config.get('agent', {}).get('batch_size', 1) 78 79 self.queue_flush_timeout = config.get('agent', {}).get('queue_flush_timeout', 30) # seconds 79 80 self.queue_first_message_time: Optional[float] = None ··· 273 274 try: 274 275 await asyncio.sleep(self.queue_flush_timeout) 275 276 276 - # Check if we still have messages to process 277 + # Check if we still have messages to process and not already processing 277 278 async with self.queue_lock: 278 - if self.message_queue: 279 + if self.message_queue and not self.processing_queue: 279 280 console.print(f"[{self.agent_name}] ā° Timeout flush: {len(self.message_queue)} messages") 281 + # Process outside the lock 282 + else: 283 + logger.debug(f"[{self.agent_name}] Flush skipped - queue empty or already processing") 284 + return 280 285 281 286 await self.process_message_queue() 282 287 283 288 except asyncio.CancelledError: 284 289 # Task was cancelled, which is fine 290 + logger.debug(f"[{self.agent_name}] Flush task cancelled") 285 291 pass 286 292 287 293 async def process_message_queue(self) -> None: 288 294 """Process all queued messages by sending them to the agent.""" 289 - if not self.message_queue: 290 - return 295 + # Acquire lock and check if we should process 296 + async with self.queue_lock: 297 + # Check if already processing or queue is empty 298 + if self.processing_queue: 299 + logger.debug(f"[{self.agent_name}] Already processing queue, skipping") 300 + return 291 301 292 - # Get all queued items 293 - async with self.queue_lock: 302 + if not self.message_queue: 303 + logger.debug(f"[{self.agent_name}] Queue is empty, nothing to process") 304 + return 305 + 306 + # Set processing flag and get items 307 + self.processing_queue = True 294 308 items_to_process = self.message_queue.copy() 295 309 self.message_queue.clear() 296 310 # Reset queue timing 297 311 self.queue_first_message_time = None 312 + # Cancel flush task since we're processing now 298 313 if self.flush_task and not self.flush_task.done(): 299 314 self.flush_task.cancel() 300 - 301 - if not items_to_process: 302 - return 315 + self.flush_task = None 303 316 304 317 # Create combined prompt for batch processing 305 318 if len(items_to_process) == 1: ··· 339 352 console.print(f"[{self.agent_name}] āŒ {error_name}: {str(e)[:100]}") 340 353 logger.error(f"[{self.agent_name}] Agent communication error: {e}") 341 354 # Continue processing - don't let one error stop the bridge 355 + finally: 356 + # Always clear the processing flag when done 357 + async with self.queue_lock: 358 + self.processing_queue = False 359 + logger.debug(f"[{self.agent_name}] Queue processing complete, flag cleared") 342 360 343 361 def agent_stream_handler(self, chunk) -> None: 344 362 """Handle streaming chunks from the agent.""" ··· 487 505 488 506 async def stop(self) -> None: 489 507 """Stop the bridge.""" 508 + logger.debug(f"[{self.agent_name}] Stopping bridge...") 490 509 self.running = False 510 + 511 + # Cancel any pending flush task 512 + if self.flush_task and not self.flush_task.done(): 513 + logger.debug(f"[{self.agent_name}] Cancelling flush task") 514 + self.flush_task.cancel() 515 + try: 516 + await self.flush_task 517 + except asyncio.CancelledError: 518 + pass 519 + 520 + # Process any remaining messages in queue 521 + if self.message_queue: 522 + logger.debug(f"[{self.agent_name}] Processing {len(self.message_queue)} remaining messages") 523 + await self.process_message_queue() 524 + 491 525 await self.disconnect_jetstream() 492 526 await self.did_cache.close() 493 - 527 + 494 528 # Final statistics 495 529 elapsed = time.time() - self.start_time 496 - console.print(f"\nšŸ“Š Final Stats:") 530 + console.print(f"\n[{self.agent_name}] šŸ“Š Final Stats:") 497 531 console.print(f" Received: {self.blips_received}") 498 532 console.print(f" Sent: {self.messages_sent_to_agent}") 499 533 console.print(f" Published: {self.blips_published}") 500 534 console.print(f" Runtime: {elapsed/60:.1f}m") 501 - console.print(f" Rate: {self.blips_received / (elapsed / 60):.1f}/min") 535 + if elapsed > 0: 536 + console.print(f" Rate: {self.blips_received / (elapsed / 60):.1f}/min") 502 537 503 538 504 539 def list_available_agents(directory: str) -> None: