src/solstone_linux/audio_recorder.py at main

solpbc.org / solstone-linux
fork atom
linux observer
fork atom
solstone-linux / src / solstone_linux / audio_recorder.py
at main 186 lines 7.2 kB view raw
wrap content
Jer Miller Initial solstone-linux package — standalone Linux desktop observer 6d ago
e56c8e13
  1# SPDX-License-Identifier: AGPL-3.0-only
  2# Copyright (c) 2026 sol pbc
  3
  4"""Audio recording for Linux desktop observer.
  5
  6Extracted from solstone's observe/hear.py — AudioRecorder class only.
  7load_transcript() and format_audio() remain in solstone core (used by 15+ files).
  8
  9Changes from monorepo version:
 10- Replaces `from observe.detect import input_detect` with local audio_detect
 11- Replaces conditional `think.callosum` import with local logging
 12- Defines SAMPLE_RATE locally (was from observe.utils)
 13"""
 14
 15from __future__ import annotations
 16
 17import gc
 18import io
 19import logging
 20import os
 21import signal
 22import threading
 23import time
 24from queue import Queue
 25
 26import numpy as np
 27import soundfile as sf
 28
 29logger = logging.getLogger(__name__)
 30
 31# Standard sample rate for audio processing
 32SAMPLE_RATE = 16000
 33BLOCK_SIZE = 1024
 34
 35
 36class AudioRecorder:
 37    """Records stereo audio from microphone and system audio."""
 38
 39    def __init__(self):
 40        # Queue holds stereo chunks (mic=left, sys=right)
 41        self.audio_queue = Queue()
 42        self._running = True
 43        self.recording_thread = None
 44
 45    def detect(self):
 46        """Detect microphone and system audio devices."""
 47        from .audio_detect import input_detect
 48
 49        mic, loopback = input_detect()
 50        if mic is None or loopback is None:
 51            logger.error(f"Detection failed: mic {mic} sys {loopback}")
 52            return False
 53        logger.info(f"Detected microphone: {mic.name}")
 54        logger.info(f"Detected system audio: {loopback.name}")
 55        self.mic_device = mic
 56        self.sys_device = loopback
 57        return True
 58
 59    def record_both(self):
 60        """Record from both mic and system audio in a loop."""
 61        while self._running:
 62            try:
 63                with (
 64                    self.mic_device.recorder(
 65                        samplerate=SAMPLE_RATE, channels=[-1], blocksize=BLOCK_SIZE
 66                    ) as mic_rec,
 67                    self.sys_device.recorder(
 68                        samplerate=SAMPLE_RATE, channels=[-1], blocksize=BLOCK_SIZE
 69                    ) as sys_rec,
 70                ):
 71                    block_count = 0
 72                    while self._running and block_count < 1000:
 73                        try:
 74                            mic_chunk = mic_rec.record(numframes=BLOCK_SIZE)
 75                            sys_chunk = sys_rec.record(numframes=BLOCK_SIZE)
 76
 77                            # Basic validation
 78                            if mic_chunk is None or mic_chunk.size == 0:
 79                                logger.warning("Empty microphone buffer")
 80                                continue
 81                            if sys_chunk is None or sys_chunk.size == 0:
 82                                logger.warning("Empty system buffer")
 83                                continue
 84
 85                            try:
 86                                stereo_chunk = np.column_stack((mic_chunk, sys_chunk))
 87                                self.audio_queue.put(stereo_chunk)
 88                                block_count += 1
 89                            except (TypeError, ValueError, AttributeError) as e:
 90                                error_msg = f"Fatal audio format error: {e}"
 91                                logger.error(
 92                                    f"{error_msg} - triggering clean shutdown\n"
 93                                    f"  mic_chunk type={type(mic_chunk)}, "
 94                                    f"shape={getattr(mic_chunk, 'shape', 'N/A')}, "
 95                                    f"dtype={getattr(mic_chunk, 'dtype', 'N/A')}\n"
 96                                    f"  sys_chunk type={type(sys_chunk)}, "
 97                                    f"shape={getattr(sys_chunk, 'shape', 'N/A')}, "
 98                                    f"dtype={getattr(sys_chunk, 'dtype', 'N/A')}"
 99                                )
100                                # Stop recording thread and trigger shutdown
101                                self._running = False
102                                os.kill(os.getpid(), signal.SIGTERM)
103                                return
104                        except Exception as e:
105                            logger.error(f"Error recording audio: {e}")
106                            if not self._running:
107                                break
108                            time.sleep(0.5)
109                del mic_rec, sys_rec
110                gc.collect()
111            except Exception as e:
112                logger.error(f"Error setting up recorders: {e}")
113                if self._running:
114                    time.sleep(1)
115
116    def get_buffers(self) -> np.ndarray:
117        """Return concatenated stereo audio data from the queue."""
118        stereo_buffer = np.array([], dtype=np.float32).reshape(0, 2)
119
120        while not self.audio_queue.empty():
121            stereo_chunk = self.audio_queue.get()
122
123            if stereo_chunk is None or stereo_chunk.size == 0:
124                logger.warning("Queue contained empty chunk")
125                continue
126
127            # Clean the data
128            stereo_chunk = np.nan_to_num(
129                stereo_chunk, nan=0.0, posinf=1e10, neginf=-1e10
130            )
131            stereo_buffer = np.vstack((stereo_buffer, stereo_chunk))
132
133        if stereo_buffer.size == 0:
134            logger.warning("No valid audio data retrieved from queue")
135
136        return stereo_buffer
137
138    def create_flac_bytes(self, stereo_data: np.ndarray) -> bytes:
139        """Create FLAC bytes from stereo audio data."""
140        if stereo_data is None or stereo_data.size == 0:
141            logger.warning("Audio data is empty. Returning empty bytes.")
142            return b""
143
144        audio_data = (np.clip(stereo_data, -1.0, 1.0) * 32767).astype(np.int16)
145
146        buf = io.BytesIO()
147        try:
148            sf.write(buf, audio_data, SAMPLE_RATE, format="FLAC")
149        except Exception as e:
150            logger.error(
151                f"Error creating FLAC: {e}. Audio data shape: {audio_data.shape}, dtype: {audio_data.dtype}"
152            )
153            return b""
154
155        return buf.getvalue()
156
157    def create_mono_flac_bytes(self, mono_data: np.ndarray) -> bytes:
158        """Create FLAC bytes from mono audio data."""
159        if mono_data is None or mono_data.size == 0:
160            logger.warning("Mono audio data is empty. Returning empty bytes.")
161            return b""
162
163        audio_data = (np.clip(mono_data, -1.0, 1.0) * 32767).astype(np.int16)
164
165        buf = io.BytesIO()
166        try:
167            sf.write(buf, audio_data, SAMPLE_RATE, format="FLAC")
168        except Exception as e:
169            logger.error(
170                f"Error creating mono FLAC: {e}. Audio shape: {audio_data.shape}"
171            )
172            return b""
173
174        return buf.getvalue()
175
176    def start_recording(self):
177        """Start the recording thread."""
178        self._running = True
179        self.recording_thread = threading.Thread(target=self.record_both, daemon=True)
180        self.recording_thread.start()
181
182    def stop_recording(self):
183        """Stop the recording thread."""
184        self._running = False
185        if self.recording_thread:
186            self.recording_thread.join(timeout=2.0)