Monorepo for Aesthetic.Computer aesthetic.computer
at main 264 lines 8.2 kB view raw
1#!/usr/bin/env bash 2# DEPRECATED: Use ac-emacs-crash-monitor (fish) instead. 3# The fish monitor has startup-lock awareness, correct config on restart, 4# load-aware timeouts, and CPU monitoring. Run: ac-emacs-start-monitor 5# 6# Emacs Watchdog - monitors emacs daemon health and auto-recovers from hangs 7# Run: ./monitor-emacs.sh (foreground) or ./monitor-emacs.sh & (background) 8# Stop: kill $(cat /tmp/emacs-watchdog.pid) or ac-watchdog-stop 9 10CHECK_INTERVAL="${WATCHDOG_INTERVAL:-10}" # Check every N seconds 11CPU_THRESHOLD="${WATCHDOG_CPU:-85}" # CPU % threshold to consider "stuck" 12CPU_SAMPLES="${WATCHDOG_SAMPLES:-3}" # Consecutive high-CPU samples before action 13TIMEOUT_THRESHOLD=5 # Seconds to wait for emacsclient response 14 15LOG_DIR="/workspaces/aesthetic-computer/.emacs-logs" 16LOG_FILE="$LOG_DIR/watchdog.log" 17PID_FILE="/tmp/emacs-watchdog.pid" 18WARNING_FILE="/tmp/emacs-watchdog-warning" 19 20mkdir -p "$LOG_DIR" 21 22log() { 23 echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE" 24} 25 26# Track high CPU samples 27high_cpu_count=0 28 29check_emacs_health() { 30 local daemon_pid 31 daemon_pid=$(pgrep -f "emacs.*daemon" 2>/dev/null | head -1) 32 33 if [ -z "$daemon_pid" ]; then 34 echo "not_running" 35 return 36 fi 37 38 # Check if responsive (quick eval) 39 if ! timeout "$TIMEOUT_THRESHOLD" emacsclient -e "t" >/dev/null 2>&1; then 40 echo "unresponsive:$daemon_pid" 41 return 42 fi 43 44 # Check CPU usage 45 local cpu 46 cpu=$(ps -p "$daemon_pid" -o %cpu= 2>/dev/null | tr -d ' ' | cut -d. -f1) 47 48 if [ -n "$cpu" ] && [ "$cpu" -gt "$CPU_THRESHOLD" ]; then 49 echo "high_cpu:$cpu:$daemon_pid" 50 return 51 fi 52 53 echo "healthy:$daemon_pid" 54} 55 56restart_emacs() { 57 local reason="$1" 58 log "🔄 WATCHDOG: Restarting emacs daemon (reason: $reason)" 59 60 # Kill all emacs processes 61 pkill -9 -f "emacs.*daemon" 2>/dev/null 62 pkill -9 emacs 2>/dev/null 63 pkill -9 emacsclient 2>/dev/null 64 sleep 2 65 66 # Start fresh daemon 67 log "🚀 WATCHDOG: Starting fresh emacs daemon..." 68 emacs --daemon 2>&1 | head -10 >> "$LOG_FILE" 69 70 sleep 2 71 72 # Verify 73 if timeout 5 emacsclient -e "t" >/dev/null 2>&1; then 74 log "✅ WATCHDOG: Emacs daemon restarted successfully" 75 # Trigger aesthetic-backend so tabs/terminals come back after crash 76 if timeout 15 emacsclient -e "(aesthetic-backend \"artery\")" >/dev/null 2>&1; then 77 log "🧭 WATCHDOG: aesthetic-backend triggered after restart" 78 else 79 log "⚠️ WATCHDOG: Failed to trigger aesthetic-backend after restart" 80 fi 81 show_warning "$reason" 82 return 0 83 else 84 log "❌ WATCHDOG: Emacs daemon failed to restart" 85 return 1 86 fi 87} 88 89show_warning() { 90 local reason="$1" 91 local timestamp 92 timestamp=$(date '+%Y-%m-%d %H:%M:%S') 93 94 # Write warning file for artery TUI to detect 95 cat > "$WARNING_FILE" << EOF 96{ 97 "timestamp": "$timestamp", 98 "reason": "$reason", 99 "message": "Emacs daemon was auto-restarted. Restart the '💻 Aesthetic' task to reconnect.", 100 "acknowledged": false 101} 102EOF 103 104 # Try ac-notify if available 105 if [ -x "/workspaces/aesthetic-computer/ac-notify" ]; then 106 /workspaces/aesthetic-computer/ac-notify "⚠️ Emacs Recovered" "Watchdog: $reason. Restart the Aesthetic task." 2>/dev/null & 107 fi 108 109 # Ring terminal bell 110 printf '\a' 2>/dev/null 111 112 log "⚠️ WARNING: $reason - user notification sent" 113} 114 115show_status() { 116 local status 117 status=$(check_emacs_health) 118 119 echo "=== Emacs Watchdog Status ===" 120 echo "PID File: $PID_FILE" 121 echo "Log File: $LOG_FILE" 122 echo "Check Interval: ${CHECK_INTERVAL}s" 123 echo "CPU Threshold: ${CPU_THRESHOLD}%" 124 echo "CPU Samples: ${CPU_SAMPLES}" 125 echo "" 126 127 case "$status" in 128 not_running) 129 echo "Emacs: 🔴 Not running" 130 ;; 131 unresponsive:*) 132 echo "Emacs: ⚠️ Unresponsive (PID: ${status#unresponsive:})" 133 ;; 134 high_cpu:*:*) 135 local cpu="${status#high_cpu:}" 136 cpu="${cpu%%:*}" 137 local pid="${status##*:}" 138 echo "Emacs: ⚠️ High CPU ${cpu}% (PID: $pid)" 139 ;; 140 healthy:*) 141 echo "Emacs: ✅ Healthy (PID: ${status#healthy:})" 142 ;; 143 esac 144 145 if [ -f "$PID_FILE" ]; then 146 local wpid 147 wpid=$(cat "$PID_FILE") 148 if ps -p "$wpid" >/dev/null 2>&1; then 149 echo "Watchdog: 🟢 Running (PID: $wpid)" 150 else 151 echo "Watchdog: 🔴 Dead (stale PID file)" 152 fi 153 else 154 echo "Watchdog: 🔴 Not running" 155 fi 156} 157 158main_loop() { 159 # Write PID file 160 echo $$ > "$PID_FILE" 161 162 log "🐕 WATCHDOG: Starting emacs health monitor (PID: $$)" 163 log " Check interval: ${CHECK_INTERVAL}s, CPU threshold: ${CPU_THRESHOLD}%, Samples needed: ${CPU_SAMPLES}" 164 165 while true; do 166 status=$(check_emacs_health) 167 168 case "$status" in 169 "not_running") 170 # Daemon not running - probably intentional, don't log spam 171 high_cpu_count=0 172 ;; 173 unresponsive:*) 174 local pid="${status#unresponsive:}" 175 log "⚠️ WATCHDOG: Emacs daemon (PID: $pid) UNRESPONSIVE - restarting" 176 high_cpu_count=0 177 restart_emacs "unresponsive (timeout after ${TIMEOUT_THRESHOLD}s)" 178 ;; 179 high_cpu:*:*) 180 local cpu="${status#high_cpu:}" 181 cpu="${cpu%%:*}" 182 local pid="${status##*:}" 183 high_cpu_count=$((high_cpu_count + 1)) 184 log "⚠️ WATCHDOG: High CPU detected (${cpu}%) on PID $pid - sample $high_cpu_count/$CPU_SAMPLES" 185 186 if [ "$high_cpu_count" -ge "$CPU_SAMPLES" ]; then 187 log "🔥 WATCHDOG: Sustained high CPU for ${CPU_SAMPLES} checks - restarting" 188 restart_emacs "sustained high CPU (${cpu}% for ${CPU_SAMPLES} samples)" 189 high_cpu_count=0 190 fi 191 ;; 192 healthy:*) 193 if [ "$high_cpu_count" -gt 0 ]; then 194 log "✅ WATCHDOG: CPU normalized after $high_cpu_count samples" 195 fi 196 high_cpu_count=0 197 ;; 198 esac 199 200 sleep "$CHECK_INTERVAL" 201 done 202} 203 204cleanup() { 205 log "🛑 WATCHDOG: Shutting down (signal received)" 206 rm -f "$PID_FILE" 207 exit 0 208} 209 210# Handle signals gracefully 211trap cleanup SIGTERM SIGINT SIGHUP 212 213# Parse arguments 214case "${1:-}" in 215 status|--status|-s) 216 show_status 217 exit 0 218 ;; 219 stop|--stop) 220 if [ -f "$PID_FILE" ]; then 221 wpid=$(cat "$PID_FILE") 222 if kill -0 "$wpid" 2>/dev/null; then 223 kill "$wpid" 224 echo "Watchdog (PID: $wpid) stopped" 225 rm -f "$PID_FILE" 226 else 227 echo "Watchdog not running (stale PID file)" 228 rm -f "$PID_FILE" 229 fi 230 else 231 echo "Watchdog not running (no PID file)" 232 fi 233 exit 0 234 ;; 235 help|--help|-h) 236 echo "Usage: $0 [command]" 237 echo "" 238 echo "Commands:" 239 echo " (none) Start watchdog in foreground" 240 echo " status Show emacs and watchdog status" 241 echo " stop Stop running watchdog" 242 echo " help Show this help" 243 echo "" 244 echo "Environment variables:" 245 echo " WATCHDOG_INTERVAL Check interval in seconds (default: 10)" 246 echo " WATCHDOG_CPU CPU threshold percentage (default: 85)" 247 echo " WATCHDOG_SAMPLES High-CPU samples before restart (default: 3)" 248 exit 0 249 ;; 250esac 251 252# Check if already running 253if [ -f "$PID_FILE" ]; then 254 existing_pid=$(cat "$PID_FILE") 255 if ps -p "$existing_pid" >/dev/null 2>&1; then 256 echo "Watchdog already running (PID: $existing_pid)" 257 echo "Use '$0 stop' to stop it first, or '$0 status' to check status" 258 exit 1 259 else 260 rm -f "$PID_FILE" 261 fi 262fi 263 264main_loop