Monorepo for Aesthetic.Computer
aesthetic.computer
1#!/usr/bin/env bash
2# DEPRECATED: Use ac-emacs-crash-monitor (fish) instead.
3# The fish monitor has startup-lock awareness, correct config on restart,
4# load-aware timeouts, and CPU monitoring. Run: ac-emacs-start-monitor
5#
6# Emacs Watchdog - monitors emacs daemon health and auto-recovers from hangs
7# Run: ./monitor-emacs.sh (foreground) or ./monitor-emacs.sh & (background)
8# Stop: kill $(cat /tmp/emacs-watchdog.pid) or ac-watchdog-stop
9
10CHECK_INTERVAL="${WATCHDOG_INTERVAL:-10}" # Check every N seconds
11CPU_THRESHOLD="${WATCHDOG_CPU:-85}" # CPU % threshold to consider "stuck"
12CPU_SAMPLES="${WATCHDOG_SAMPLES:-3}" # Consecutive high-CPU samples before action
13TIMEOUT_THRESHOLD=5 # Seconds to wait for emacsclient response
14
15LOG_DIR="/workspaces/aesthetic-computer/.emacs-logs"
16LOG_FILE="$LOG_DIR/watchdog.log"
17PID_FILE="/tmp/emacs-watchdog.pid"
18WARNING_FILE="/tmp/emacs-watchdog-warning"
19
20mkdir -p "$LOG_DIR"
21
22log() {
23 echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
24}
25
26# Track high CPU samples
27high_cpu_count=0
28
29check_emacs_health() {
30 local daemon_pid
31 daemon_pid=$(pgrep -f "emacs.*daemon" 2>/dev/null | head -1)
32
33 if [ -z "$daemon_pid" ]; then
34 echo "not_running"
35 return
36 fi
37
38 # Check if responsive (quick eval)
39 if ! timeout "$TIMEOUT_THRESHOLD" emacsclient -e "t" >/dev/null 2>&1; then
40 echo "unresponsive:$daemon_pid"
41 return
42 fi
43
44 # Check CPU usage
45 local cpu
46 cpu=$(ps -p "$daemon_pid" -o %cpu= 2>/dev/null | tr -d ' ' | cut -d. -f1)
47
48 if [ -n "$cpu" ] && [ "$cpu" -gt "$CPU_THRESHOLD" ]; then
49 echo "high_cpu:$cpu:$daemon_pid"
50 return
51 fi
52
53 echo "healthy:$daemon_pid"
54}
55
56restart_emacs() {
57 local reason="$1"
58 log "🔄 WATCHDOG: Restarting emacs daemon (reason: $reason)"
59
60 # Kill all emacs processes
61 pkill -9 -f "emacs.*daemon" 2>/dev/null
62 pkill -9 emacs 2>/dev/null
63 pkill -9 emacsclient 2>/dev/null
64 sleep 2
65
66 # Start fresh daemon
67 log "🚀 WATCHDOG: Starting fresh emacs daemon..."
68 emacs --daemon 2>&1 | head -10 >> "$LOG_FILE"
69
70 sleep 2
71
72 # Verify
73 if timeout 5 emacsclient -e "t" >/dev/null 2>&1; then
74 log "✅ WATCHDOG: Emacs daemon restarted successfully"
75 # Trigger aesthetic-backend so tabs/terminals come back after crash
76 if timeout 15 emacsclient -e "(aesthetic-backend \"artery\")" >/dev/null 2>&1; then
77 log "🧭 WATCHDOG: aesthetic-backend triggered after restart"
78 else
79 log "⚠️ WATCHDOG: Failed to trigger aesthetic-backend after restart"
80 fi
81 show_warning "$reason"
82 return 0
83 else
84 log "❌ WATCHDOG: Emacs daemon failed to restart"
85 return 1
86 fi
87}
88
89show_warning() {
90 local reason="$1"
91 local timestamp
92 timestamp=$(date '+%Y-%m-%d %H:%M:%S')
93
94 # Write warning file for artery TUI to detect
95 cat > "$WARNING_FILE" << EOF
96{
97 "timestamp": "$timestamp",
98 "reason": "$reason",
99 "message": "Emacs daemon was auto-restarted. Restart the '💻 Aesthetic' task to reconnect.",
100 "acknowledged": false
101}
102EOF
103
104 # Try ac-notify if available
105 if [ -x "/workspaces/aesthetic-computer/ac-notify" ]; then
106 /workspaces/aesthetic-computer/ac-notify "⚠️ Emacs Recovered" "Watchdog: $reason. Restart the Aesthetic task." 2>/dev/null &
107 fi
108
109 # Ring terminal bell
110 printf '\a' 2>/dev/null
111
112 log "⚠️ WARNING: $reason - user notification sent"
113}
114
115show_status() {
116 local status
117 status=$(check_emacs_health)
118
119 echo "=== Emacs Watchdog Status ==="
120 echo "PID File: $PID_FILE"
121 echo "Log File: $LOG_FILE"
122 echo "Check Interval: ${CHECK_INTERVAL}s"
123 echo "CPU Threshold: ${CPU_THRESHOLD}%"
124 echo "CPU Samples: ${CPU_SAMPLES}"
125 echo ""
126
127 case "$status" in
128 not_running)
129 echo "Emacs: 🔴 Not running"
130 ;;
131 unresponsive:*)
132 echo "Emacs: ⚠️ Unresponsive (PID: ${status#unresponsive:})"
133 ;;
134 high_cpu:*:*)
135 local cpu="${status#high_cpu:}"
136 cpu="${cpu%%:*}"
137 local pid="${status##*:}"
138 echo "Emacs: ⚠️ High CPU ${cpu}% (PID: $pid)"
139 ;;
140 healthy:*)
141 echo "Emacs: ✅ Healthy (PID: ${status#healthy:})"
142 ;;
143 esac
144
145 if [ -f "$PID_FILE" ]; then
146 local wpid
147 wpid=$(cat "$PID_FILE")
148 if ps -p "$wpid" >/dev/null 2>&1; then
149 echo "Watchdog: 🟢 Running (PID: $wpid)"
150 else
151 echo "Watchdog: 🔴 Dead (stale PID file)"
152 fi
153 else
154 echo "Watchdog: 🔴 Not running"
155 fi
156}
157
158main_loop() {
159 # Write PID file
160 echo $$ > "$PID_FILE"
161
162 log "🐕 WATCHDOG: Starting emacs health monitor (PID: $$)"
163 log " Check interval: ${CHECK_INTERVAL}s, CPU threshold: ${CPU_THRESHOLD}%, Samples needed: ${CPU_SAMPLES}"
164
165 while true; do
166 status=$(check_emacs_health)
167
168 case "$status" in
169 "not_running")
170 # Daemon not running - probably intentional, don't log spam
171 high_cpu_count=0
172 ;;
173 unresponsive:*)
174 local pid="${status#unresponsive:}"
175 log "⚠️ WATCHDOG: Emacs daemon (PID: $pid) UNRESPONSIVE - restarting"
176 high_cpu_count=0
177 restart_emacs "unresponsive (timeout after ${TIMEOUT_THRESHOLD}s)"
178 ;;
179 high_cpu:*:*)
180 local cpu="${status#high_cpu:}"
181 cpu="${cpu%%:*}"
182 local pid="${status##*:}"
183 high_cpu_count=$((high_cpu_count + 1))
184 log "⚠️ WATCHDOG: High CPU detected (${cpu}%) on PID $pid - sample $high_cpu_count/$CPU_SAMPLES"
185
186 if [ "$high_cpu_count" -ge "$CPU_SAMPLES" ]; then
187 log "🔥 WATCHDOG: Sustained high CPU for ${CPU_SAMPLES} checks - restarting"
188 restart_emacs "sustained high CPU (${cpu}% for ${CPU_SAMPLES} samples)"
189 high_cpu_count=0
190 fi
191 ;;
192 healthy:*)
193 if [ "$high_cpu_count" -gt 0 ]; then
194 log "✅ WATCHDOG: CPU normalized after $high_cpu_count samples"
195 fi
196 high_cpu_count=0
197 ;;
198 esac
199
200 sleep "$CHECK_INTERVAL"
201 done
202}
203
204cleanup() {
205 log "🛑 WATCHDOG: Shutting down (signal received)"
206 rm -f "$PID_FILE"
207 exit 0
208}
209
210# Handle signals gracefully
211trap cleanup SIGTERM SIGINT SIGHUP
212
213# Parse arguments
214case "${1:-}" in
215 status|--status|-s)
216 show_status
217 exit 0
218 ;;
219 stop|--stop)
220 if [ -f "$PID_FILE" ]; then
221 wpid=$(cat "$PID_FILE")
222 if kill -0 "$wpid" 2>/dev/null; then
223 kill "$wpid"
224 echo "Watchdog (PID: $wpid) stopped"
225 rm -f "$PID_FILE"
226 else
227 echo "Watchdog not running (stale PID file)"
228 rm -f "$PID_FILE"
229 fi
230 else
231 echo "Watchdog not running (no PID file)"
232 fi
233 exit 0
234 ;;
235 help|--help|-h)
236 echo "Usage: $0 [command]"
237 echo ""
238 echo "Commands:"
239 echo " (none) Start watchdog in foreground"
240 echo " status Show emacs and watchdog status"
241 echo " stop Stop running watchdog"
242 echo " help Show this help"
243 echo ""
244 echo "Environment variables:"
245 echo " WATCHDOG_INTERVAL Check interval in seconds (default: 10)"
246 echo " WATCHDOG_CPU CPU threshold percentage (default: 85)"
247 echo " WATCHDOG_SAMPLES High-CPU samples before restart (default: 3)"
248 exit 0
249 ;;
250esac
251
252# Check if already running
253if [ -f "$PID_FILE" ]; then
254 existing_pid=$(cat "$PID_FILE")
255 if ps -p "$existing_pid" >/dev/null 2>&1; then
256 echo "Watchdog already running (PID: $existing_pid)"
257 echo "Use '$0 stop' to stop it first, or '$0 status' to check status"
258 exit 1
259 else
260 rm -f "$PID_FILE"
261 fi
262fi
263
264main_loop