linux observer
1# SPDX-License-Identifier: AGPL-3.0-only
2# Copyright (c) 2026 sol pbc
3
4"""Audio recording for Linux desktop observer.
5
6Extracted from solstone's observe/hear.py — AudioRecorder class only.
7load_transcript() and format_audio() remain in solstone core (used by 15+ files).
8
9Changes from monorepo version:
10- Replaces `from observe.detect import input_detect` with local audio_detect
11- Replaces conditional `think.callosum` import with local logging
12- Defines SAMPLE_RATE locally (was from observe.utils)
13"""
14
15from __future__ import annotations
16
17import gc
18import io
19import logging
20import os
21import signal
22import threading
23import time
24from queue import Queue
25
26import numpy as np
27import soundfile as sf
28
29logger = logging.getLogger(__name__)
30
31# Standard sample rate for audio processing
32SAMPLE_RATE = 16000
33BLOCK_SIZE = 1024
34
35
36class AudioRecorder:
37 """Records stereo audio from microphone and system audio."""
38
39 def __init__(self):
40 # Queue holds stereo chunks (mic=left, sys=right)
41 self.audio_queue = Queue()
42 self._running = True
43 self.recording_thread = None
44
45 def detect(self):
46 """Detect microphone and system audio devices."""
47 from .audio_detect import input_detect
48
49 mic, loopback = input_detect()
50 if mic is None or loopback is None:
51 logger.error(f"Detection failed: mic {mic} sys {loopback}")
52 return False
53 logger.info(f"Detected microphone: {mic.name}")
54 logger.info(f"Detected system audio: {loopback.name}")
55 self.mic_device = mic
56 self.sys_device = loopback
57 return True
58
59 def record_both(self):
60 """Record from both mic and system audio in a loop."""
61 while self._running:
62 try:
63 with (
64 self.mic_device.recorder(
65 samplerate=SAMPLE_RATE, channels=[-1], blocksize=BLOCK_SIZE
66 ) as mic_rec,
67 self.sys_device.recorder(
68 samplerate=SAMPLE_RATE, channels=[-1], blocksize=BLOCK_SIZE
69 ) as sys_rec,
70 ):
71 block_count = 0
72 while self._running and block_count < 1000:
73 try:
74 mic_chunk = mic_rec.record(numframes=BLOCK_SIZE)
75 sys_chunk = sys_rec.record(numframes=BLOCK_SIZE)
76
77 # Basic validation
78 if mic_chunk is None or mic_chunk.size == 0:
79 logger.warning("Empty microphone buffer")
80 continue
81 if sys_chunk is None or sys_chunk.size == 0:
82 logger.warning("Empty system buffer")
83 continue
84
85 try:
86 stereo_chunk = np.column_stack((mic_chunk, sys_chunk))
87 self.audio_queue.put(stereo_chunk)
88 block_count += 1
89 except (TypeError, ValueError, AttributeError) as e:
90 error_msg = f"Fatal audio format error: {e}"
91 logger.error(
92 f"{error_msg} - triggering clean shutdown\n"
93 f" mic_chunk type={type(mic_chunk)}, "
94 f"shape={getattr(mic_chunk, 'shape', 'N/A')}, "
95 f"dtype={getattr(mic_chunk, 'dtype', 'N/A')}\n"
96 f" sys_chunk type={type(sys_chunk)}, "
97 f"shape={getattr(sys_chunk, 'shape', 'N/A')}, "
98 f"dtype={getattr(sys_chunk, 'dtype', 'N/A')}"
99 )
100 # Stop recording thread and trigger shutdown
101 self._running = False
102 os.kill(os.getpid(), signal.SIGTERM)
103 return
104 except Exception as e:
105 logger.error(f"Error recording audio: {e}")
106 if not self._running:
107 break
108 time.sleep(0.5)
109 del mic_rec, sys_rec
110 gc.collect()
111 except Exception as e:
112 logger.error(f"Error setting up recorders: {e}")
113 if self._running:
114 time.sleep(1)
115
116 def get_buffers(self) -> np.ndarray:
117 """Return concatenated stereo audio data from the queue."""
118 stereo_buffer = np.array([], dtype=np.float32).reshape(0, 2)
119
120 while not self.audio_queue.empty():
121 stereo_chunk = self.audio_queue.get()
122
123 if stereo_chunk is None or stereo_chunk.size == 0:
124 logger.warning("Queue contained empty chunk")
125 continue
126
127 # Clean the data
128 stereo_chunk = np.nan_to_num(
129 stereo_chunk, nan=0.0, posinf=1e10, neginf=-1e10
130 )
131 stereo_buffer = np.vstack((stereo_buffer, stereo_chunk))
132
133 if stereo_buffer.size == 0:
134 logger.warning("No valid audio data retrieved from queue")
135
136 return stereo_buffer
137
138 def create_flac_bytes(self, stereo_data: np.ndarray) -> bytes:
139 """Create FLAC bytes from stereo audio data."""
140 if stereo_data is None or stereo_data.size == 0:
141 logger.warning("Audio data is empty. Returning empty bytes.")
142 return b""
143
144 audio_data = (np.clip(stereo_data, -1.0, 1.0) * 32767).astype(np.int16)
145
146 buf = io.BytesIO()
147 try:
148 sf.write(buf, audio_data, SAMPLE_RATE, format="FLAC")
149 except Exception as e:
150 logger.error(
151 f"Error creating FLAC: {e}. Audio data shape: {audio_data.shape}, dtype: {audio_data.dtype}"
152 )
153 return b""
154
155 return buf.getvalue()
156
157 def create_mono_flac_bytes(self, mono_data: np.ndarray) -> bytes:
158 """Create FLAC bytes from mono audio data."""
159 if mono_data is None or mono_data.size == 0:
160 logger.warning("Mono audio data is empty. Returning empty bytes.")
161 return b""
162
163 audio_data = (np.clip(mono_data, -1.0, 1.0) * 32767).astype(np.int16)
164
165 buf = io.BytesIO()
166 try:
167 sf.write(buf, audio_data, SAMPLE_RATE, format="FLAC")
168 except Exception as e:
169 logger.error(
170 f"Error creating mono FLAC: {e}. Audio shape: {audio_data.shape}"
171 )
172 return b""
173
174 return buf.getvalue()
175
176 def start_recording(self):
177 """Start the recording thread."""
178 self._running = True
179 self.recording_thread = threading.Thread(target=self.record_both, daemon=True)
180 self.recording_thread.start()
181
182 def stop_recording(self):
183 """Stop the recording thread."""
184 self._running = False
185 if self.recording_thread:
186 self.recording_thread.join(timeout=2.0)