fedac/native/src/audio.h at main · aesthetic.computer/core

aesthetic.computer / core
fork atom
Monorepo for Aesthetic.Computer aesthetic.computer
fork atom
core / fedac / native / src / audio.h
at main 439 lines 22 kB view raw
wrap content
prompt.ac/@jeffrey native: negotiate S32_LE format for SOF speaker PCM (fixes crunchy/quiet) 3hrs ago
6366223f
  1#ifndef AC_AUDIO_H
  2#define AC_AUDIO_H
  3
  4#include <stdint.h>
  5#include <stdio.h>
  6#include <pthread.h>
  7#include "audio-decode.h"
  8
  9#define AUDIO_SAMPLE_RATE 192000
 10#define AUDIO_CHANNELS 2
 11#define AUDIO_PERIOD_SIZE 192   // ~1ms at 192kHz — minimal latency
 12#define AUDIO_MAX_VOICES 32
 13#define AUDIO_WAVEFORM_SIZE 512
 14#define AUDIO_MAX_SAMPLE_VOICES 12
 15#define AUDIO_MAX_SAMPLE_SECS 10
 16#define AUDIO_OUTPUT_HISTORY_SECS 12
 17#define AUDIO_OUTPUT_HISTORY_RATE 48000
 18#define AUDIO_MAX_DECKS 2
 19
 20typedef enum {
 21    VOICE_INACTIVE = 0,
 22    VOICE_ACTIVE,
 23    VOICE_KILLING
 24} VoiceState;
 25
 26typedef enum {
 27    WAVE_SINE = 0,
 28    WAVE_TRIANGLE,
 29    WAVE_SAWTOOTH,
 30    WAVE_SQUARE,
 31    WAVE_NOISE,
 32    WAVE_WHISTLE,
 33    WAVE_GUN
 34} WaveType;
 35
 36// Gun voice presets. Two synthesis models are available per preset:
 37//   GUN_MODEL_CLASSIC  — three-layer kick/snare-style synthesis:
 38//     crack (BPF noise burst), boom (sine with downward pitch sweep),
 39//     tail (LPF noise with attack-decay). Cheap, predictable, sounds
 40//     like a "gun sound effect" the way classic sound libraries do.
 41//   GUN_MODEL_PHYSICAL — digital waveguide barrel resonance + body
 42//     modes (parallel biquads) + radiation HPF. Physically motivated;
 43//     better for cavity-dominated sounds (grenade, RPG).
 44// Per-weapon model choice + parameters live in gun_presets[] in audio.c.
 45typedef enum {
 46    GUN_MODEL_CLASSIC = 0,
 47    GUN_MODEL_PHYSICAL = 1
 48} GunModel;
 49
 50typedef enum {
 51    GUN_PISTOL = 0,     // 9mm — short barrel, bright crack
 52    GUN_RIFLE,          // AR/AK — medium barrel + supersonic N-wave
 53    GUN_SHOTGUN,        // 12ga — wide bore, heavy low-end
 54    GUN_SMG,            // MP5 — short barrel, fast rattle
 55    GUN_SUPPRESSED,     // silenced pistol — muffled "pfft"
 56    GUN_LMG,            // M60 auto-fire — retriggers while held
 57    GUN_SNIPER,         // .50 cal — huge pressure, long tail
 58    GUN_GRENADE,        // explosion — low cavity, slow release
 59    GUN_RPG,            // rocket — long burn, delayed boom
 60    GUN_RELOAD,         // magazine clack — metallic click
 61    GUN_COCK,           // bolt cock — two-click (primary + delayed)
 62    GUN_RICOCHET,       // metallic ping — pitch-drops on release
 63    GUN_PRESET_COUNT
 64} GunPreset;
 65
 66typedef struct {
 67    VoiceState state;
 68    WaveType type;
 69    double phase;           // 0.0-1.0 phase accumulator
 70    double frequency;       // Hz (smoothed toward target)
 71    double target_frequency; // Hz (set by update, smoothed per sample)
 72    double volume;          // 0.0-1.0
 73    double pan;             // -1.0 to 1.0
 74    double attack;          // seconds
 75    double decay;           // seconds (time before end to start fading)
 76    double duration;        // seconds (INFINITY for sustained)
 77    double elapsed;         // seconds since start
 78    double fade_duration;   // for kill(fade)
 79    double fade_elapsed;    // progress through fade
 80    double started_at;      // monotonic time reference
 81    uint64_t id;            // unique voice ID
 82    // Noise filter state
 83    double noise_b0, noise_b1, noise_b2, noise_a1, noise_a2;
 84    double noise_x1, noise_x2, noise_y1, noise_y2;
 85    uint32_t noise_seed;
 86    // Digital waveguide flute/whistle state (Perry Cook STK Flute model)
 87    // See audio.c:generate_whistle_sample for algorithm notes. The bore
 88    // delay line is the primary resonator — its length sets pitch and
 89    // its feedback loop generates all the harmonics. The jet delay +
 90    // cubic nonlinearity drives the loop into sustained oscillation.
 91    double whistle_breath;            // envelope-smoothed breath pressure
 92    double whistle_vibrato_phase;     // 0..1 vibrato LFO phase
 93    double whistle_lp1;               // 1-pole loop LPF state
 94    double whistle_hp_x1, whistle_hp_y1; // 1-pole DC blocker state
 95    // Bore delay line — up to ~2048 samples at 192kHz covers down to ~94 Hz.
 96    // Write cursor advances by 1 each tick; reads use fractional delay
 97    // indexing for smooth pitch.
 98    float whistle_bore_buf[2048];
 99    int whistle_bore_w;
100    // Jet delay line — shorter, models embouchure travel time (~0.32×bore).
101    float whistle_jet_buf[512];
102    int whistle_jet_w;
103    // === Gun DWG state (see generate_gun_sample) ===
104    // Most of these are copied from the preset on note-on; mutable ones
105    // (pressure_env, body_y1/y2, bore_lp, rad_prev) evolve each sample.
106    // The bore delay buffer is shared with `whistle_bore_buf` since a
107    // voice can only be one wave type at a time.
108    int    gun_preset;              // GunPreset index (for debug)
109    double gun_bore_delay;          // samples (= bore_length_s * sr)
110    double gun_bore_loss;           // 1-pole LPF alpha in bore loop
111    double gun_bore_lp;             // LPF state
112    double gun_breech_reflect;      // closed-breech reflection gain (0..1)
113    double gun_pressure;            // excitation peak (weapon power)
114    double gun_pressure_env;        // live excitation envelope 0..1
115    double gun_env_decay_mult;      // per-sample decay multiplier (exp)
116    double gun_noise_gain;          // turbulent gas noise modulation depth
117    double gun_radiation_a;         // muzzle HPF 1-zero coefficient (0..1)
118    double gun_rad_prev;            // HPF previous input
119    // Secondary excitation — fires once more at secondary_trig samples
120    // elapsed. Used for supersonic N-wave (rifle/sniper) and for the
121    // second click of a cock/reload two-click gesture.
122    double gun_secondary_trig;      // sample countdown (<=0 = fired)
123    double gun_secondary_amp;       // relative amplitude of 2nd shot
124    // Sustained fire (LMG) — retrigger the excitation on cadence while
125    // the voice is held (infinite-duration voice, released via kill).
126    int    gun_sustain_fire;
127    double gun_retrig_timer;        // seconds
128    double gun_retrig_period;       // seconds (60 / RPM)
129    // Body mode resonators — 3 parallel biquads excited by same pulse.
130    // Coefficients precomputed from preset on note-on.
131    double gun_body_a1[3], gun_body_a2[3];
132    double gun_body_amp[3];
133    double gun_body_y1[3], gun_body_y2[3];
134    // Pitch sweep (ricochet) — multiplier applied to bore delay (physical)
135    // or to boom freq (classic). When voice enters VOICE_KILLING, target
136    // flips so the bore stretches → doppler drop during release.
137    double gun_pitch_mult;          // current (smoothed)
138    double gun_pitch_target;        // target (set on trigger / release)
139    double gun_pitch_slew;          // per-sample approach rate
140    // === Gun classic-model state (used when gun_model == GUN_MODEL_CLASSIC) ===
141    // Layered synthesis: crack (BPF noise burst, decays via gun_pressure_env
142    // and gun_env_decay_mult, filtered through body[0] biquad), boom (pitched
143    // sine/triangle with exponential pitch sweep + amp decay), tail (LPF noise
144    // with linear attack ramp + exponential decay, filtered through body[1]).
145    int    gun_model;               // GunModel: 0=classic, 1=physical
146    double gun_boom_phase;          // 0..1 oscillator phase
147    double gun_boom_freq;           // current Hz (sweeps toward gun_boom_freq_end)
148    double gun_boom_freq_start;     // Hz at trigger (for LMG sustain-fire retrigger)
149    double gun_boom_freq_end;       // settled Hz (target after pitch sweep)
150    double gun_boom_pitch_mult;     // per-sample geometric approach (closer to 0 = faster)
151    double gun_boom_env;            // amp envelope (decays each sample)
152    double gun_boom_decay_mult;     // per-sample amp decay multiplier
153    double gun_tail_env;            // amp envelope (rises during attack, then decays)
154    double gun_tail_attack_inc;     // per-sample envelope increment during attack (0 = instant)
155    double gun_tail_decay_mult;     // per-sample amp decay multiplier (after attack done)
156    double gun_crack_b0;            // BPF input gain (state lives in body[0])
157    double gun_tail_b0, gun_tail_b1, gun_tail_b2;  // LPF feed-forward coefs (state in body[1])
158    // Click layer — sub-millisecond high-frequency transient. Adds the
159    // "tk" snap to the front of the envelope so the crack reads as
160    // crisp instead of as a shaped noise burst. Layered before crack.
161    double gun_click_env;           // amp envelope (decays each sample)
162    double gun_click_decay_mult;    // per-sample multiplier (typ ~exp(-1/(0.5ms*sr)))
163    double gun_click_amp;           // mix gain
164    double gun_click_prev;          // 1-zero HPF state (white_noise[n-1])
165    // Physical-model excitation state — Friedlander blast wave shape.
166    // `t_samples` counts up from 0 each trigger; the muzzle pulse follows
167    // P(t) = peak·(1−t/t+)·exp(−A·t/t+) for t in [0,t+], then a small
168    // negative phase, then silence. This replaces the old white-noise +
169    // exp-decay excitation with the actual shape of a blast wave.
170    double gun_phys_t;              // samples since last trigger
171    double gun_phys_t_plus;         // positive-phase duration (samples)
172    double gun_phys_friedlander_a;  // decay exponent (typ. 1.5)
173    double gun_phys_neg_amp;        // negative-phase peak (relative)
174    double gun_phys_echo_delay;     // ground-reflection delay (samples)
175    double gun_phys_echo_amp;       // ground-reflection gain
176    double gun_phys_echo_buf[1024]; // small ring for echo tap (~5ms @ 192kHz)
177    int    gun_phys_echo_w;
178} ACVoice;
179
180typedef struct {
181    int active;
182    int loop;               // 1 = loop sample, 0 = one-shot
183    double position;        // fractional sample index
184    double speed;           // playback rate (1.0 = original pitch)
185    double volume;
186    double pan;
187    double fade;            // 0-1 envelope
188    double fade_target;     // 0 = killing, 1 = playing
189    uint64_t id;
190} SampleVoice;
191
192typedef struct {
193    volatile int     active;        // deck loaded and ready
194    volatile int     playing;       // currently producing audio
195    float            volume;        // 0.0–1.0
196    ACDeckDecoder   *decoder;       // streaming decoder instance
197} ACDeck;
198
199typedef struct {
200    void *pcm;              // snd_pcm_t* (void to avoid header dep)
201    pthread_t thread;
202    volatile int running;
203
204    ACVoice voices[AUDIO_MAX_VOICES];
205    pthread_mutex_t lock;
206
207    uint64_t next_id;
208    double time;            // current audio time
209    uint64_t total_frames;
210
211    // Speaker poll data
212    float waveform_left[AUDIO_WAVEFORM_SIZE];
213    float waveform_right[AUDIO_WAVEFORM_SIZE];
214    float amplitude_left;
215    float amplitude_right;
216    int waveform_pos;
217
218    // BPM / metronome
219    double bpm;
220    double beat_elapsed;
221    volatile int beat_triggered;
222
223    // Effects
224    int room_enabled;
225    float *room_buf_l, *room_buf_r;
226    int room_pos;
227    int room_size;
228    float room_mix;         // 0.0 to 1.0 wet mix (smoothed toward target)
229    float target_room_mix;  // target wet mix (set by JS, smoothed per sample)
230
231    int glitch_enabled;
232    float glitch_hold_l, glitch_hold_r;
233    int glitch_counter;
234    int glitch_rate;        // samples between holds
235    float glitch_mix;       // 0.0 = clean, 1.0 = full sample-hold + bitcrush
236    float target_glitch_mix;// target mix from JS, smoothed per sample
237
238    // FX mix: dry/wet blend for entire FX chain (reverb + glitch)
239    float fx_mix;           // 0.0 = fully dry, 1.0 = fully wet (smoothed)
240    float target_fx_mix;    // target (set by JS, smoothed per sample)
241
242    // System mixer volume (0-100 percent)
243    int system_volume;
244    int card_index;  // ALSA card number (0 or 1)
245    unsigned int actual_rate;    // Negotiated ALSA sample rate (may differ from requested)
246    unsigned int actual_period;  // Negotiated ALSA period size in frames
247    int use_s32;                 // 1 if PCM negotiated S32_LE (SOF boards), 0 for S16_LE
248
249    // TTS PCM buffer (resampled to output rate, mono → stereo in mix)
250    float *tts_buf;             // ring buffer of mono float samples at output rate
251    volatile int tts_write_pos; // producer (tts thread) writes here
252    volatile int tts_read_pos;  // consumer (audio thread) reads here
253    int tts_buf_size;           // ring buffer size
254    float tts_volume;           // 0.0-1.0
255    float tts_fade;             // 0.0-1.0 per-sample envelope (prevents click on start/stop)
256
257    // Microphone capture + sample playback
258    float *sample_buf;          // recorded sample (mono, at capture rate) — audio thread reads
259    float *sample_buf_back;     // back buffer for double-buffering — JS thread writes here
260    volatile int sample_len;    // length in samples (0 = no sample)
261    int sample_max_len;         // buffer capacity
262    unsigned int sample_rate;   // capture sample rate (for speed calc)
263    volatile int recording;     // 1 = buffering mic input to sample_buf
264    volatile int sample_write_pos; // write cursor during recording
265    volatile int mic_connected; // 1 = capture device currently open
266    volatile int mic_hot;       // 1 = hot-mic thread running (device stays open)
267    volatile float mic_level;   // raw peak level (0.0-1.0) per chunk
268    volatile int mic_last_chunk;// last captured frame count
269    char mic_device[64];        // active ALSA capture device string
270    char mic_last_error[128];   // last capture error message
271    pthread_t capture_thread;
272    volatile int capture_thread_running; // 1 while capture thread is alive
273
274    // Continuous capture ring buffer (always written by capture thread)
275    float *mic_ring;            // ring buffer, same capacity as sample_buf
276    volatile int mic_ring_pos;  // monotonic write position (mod sample_max_len)
277    volatile int rec_start_ring_pos; // ring position when recording started
278
279    // Live mic waveform ring buffer (for visualization)
280    #define MIC_WAVEFORM_SIZE 128
281    float mic_waveform[128];    // circular buffer of recent samples (downsampled)
282    volatile int mic_waveform_pos; // write position in ring
283    SampleVoice sample_voices[AUDIO_MAX_SAMPLE_VOICES];
284    uint64_t sample_next_id;
285
286    // Dedicated global replay voice/buffer so reverse playback does not
287    // steal or overwrite the regular sample bank.
288    float *replay_buf;
289    float *replay_buf_back;
290    volatile int replay_len;
291    int replay_max_len;
292    unsigned int replay_rate;
293    SampleVoice replay_voice;
294
295    // Recent rendered-output history for true reverse replay.
296    float *output_history_buf;         // mono output ring tapped before room/glitch/TTS
297    int output_history_size;           // ring capacity in samples
298    unsigned int output_history_rate;  // capture rate exposed to JS
299    unsigned int output_history_downsample_n;   // output-rate -> history-rate stride
300    unsigned int output_history_downsample_pos; // current stride counter
301    uint64_t output_history_write_pos;          // monotonic write position
302
303    // DJ deck audio (persistent across piece switches)
304    ACDeck decks[AUDIO_MAX_DECKS];
305    float crossfader;           // 0.0 = deck A, 1.0 = deck B
306    float deck_master_volume;   // overall deck volume (default 0.8)
307
308    // Parallel headphone PCM (sof-rt5682+max98360a auto-route).
309    // Open in addition to the main speaker PCM so both the SSP0 (RT5682
310    // headset) and SSP1 (MAX98360A speaker) DAIs receive the same audio
311    // stream. The codec's DAPM jack-sense mutes the inactive side, so
312    // unplugged → speaker plays, headphones plugged → headphone plays.
313    void *headphone_pcm;        // snd_pcm_t* for headphone PCM (NULL if same as main)
314
315    // HDMI audio output (secondary, low-pass filtered clone)
316    void *hdmi_pcm;             // snd_pcm_t* for HDMI audio device (NULL if not found)
317    unsigned int hdmi_rate;     // negotiated HDMI sample rate
318    int hdmi_downsample_n;      // primary_rate / hdmi_rate (round)
319    int hdmi_downsample_pos;    // counter for downsampling
320    float hdmi_lp_l, hdmi_lp_r; // LP filter state (simple 1-pole IIR)
321    int16_t hdmi_period[512*2]; // interleaved S16 staging buffer
322    int hdmi_period_pos;        // samples written so far
323    int hdmi_period_size;       // target period size in frames
324
325    // Recording tap: if set, called after each mixed period with final int16 PCM
326    void (*rec_callback)(const int16_t *pcm, int frames, void *userdata);
327    void *rec_userdata;
328
329    // Diagnostic info (exposed to JS via system.hw)
330    char audio_device[32];      // ALSA device name that opened successfully
331    char audio_status[64];      // human-readable status ("ok", "no card", etc.)
332    int audio_init_retries;     // how many devices we tried before success
333} ACAudio;
334
335// Initialize ALSA audio engine (returns NULL if no audio device)
336ACAudio *audio_init(void);
337
338// Add a new voice, returns voice ID
339uint64_t audio_synth(ACAudio *audio, WaveType type, double freq,
340                     double duration, double volume, double attack,
341                     double decay, double pan);
342
343// Add a new gun voice with a specific preset (applies DWG parameters).
344// `volume` scales the output, `pan` places it in stereo. `duration` is
345// normally INFINITY for held guns (LMG sustain fire) or finite for
346// one-shots; the internal DWG excitation handles the bang envelope.
347// `force_model` overrides the preset's default GunModel: pass -1 to use
348// the preset's choice, 0 to force CLASSIC (3-layer synthesis), or 1 to
349// force PHYSICAL (DWG bore + body modes). Lets the same weapon be A/B-
350// compared between models without separate presets.
351uint64_t audio_synth_gun(ACAudio *audio, GunPreset preset, double duration,
352                         double volume, double attack, double decay,
353                         double pan, double pressure_scale, int force_model);
354
355// Override one preset-derived parameter on a freshly-created gun voice.
356// Call between audio_synth_gun() and the next audio thread tick to
357// retune the next shot. Unknown keys are ignored. Used by the inspector
358// drag-to-edit cards. Key names match the gun_presets[] field names.
359void audio_gun_voice_set_param(ACAudio *audio, uint64_t id,
360                               const char *key, double value);
361
362// Kill a voice with fade
363void audio_kill(ACAudio *audio, uint64_t id, double fade);
364
365// Update a voice's parameters
366void audio_update(ACAudio *audio, uint64_t id, double freq,
367                  double volume, double pan);
368
369// Check if beat was triggered (and clear flag)
370int audio_beat_check(ACAudio *audio);
371
372// Set BPM
373void audio_set_bpm(ACAudio *audio, double bpm);
374
375// Toggle effects
376void audio_room_toggle(ACAudio *audio);
377void audio_glitch_toggle(ACAudio *audio);
378void audio_set_room_mix(ACAudio *audio, float mix);
379void audio_set_glitch_mix(ACAudio *audio, float mix);
380void audio_set_fx_mix(ACAudio *audio, float mix);
381
382// Microphone — hot-mic mode (device stays open, recording toggles buffering)
383int audio_mic_open(ACAudio *audio);    // open device + start hot-mic thread
384void audio_mic_close(ACAudio *audio);  // stop thread + close device
385int audio_mic_start(ACAudio *audio);   // begin buffering (instant, no device open)
386int audio_mic_stop(ACAudio *audio);    // stop buffering, returns sample length
387
388// Sample playback with pitch shifting (loop=1 for infinite loop, 0 for one-shot)
389uint64_t audio_sample_play(ACAudio *audio, double freq, double base_freq,
390                           double volume, double pan, int loop);
391void audio_sample_kill(ACAudio *audio, uint64_t id, double fade);
392void audio_sample_update(ACAudio *audio, uint64_t id, double freq,
393                         double base_freq, double volume, double pan);
394void audio_replay_load_data(ACAudio *audio, const float *data, int len, unsigned int rate);
395uint64_t audio_replay_play(ACAudio *audio, double freq, double base_freq,
396                           double volume, double pan, int loop);
397void audio_replay_kill(ACAudio *audio, uint64_t id, double fade);
398void audio_replay_update(ACAudio *audio, uint64_t id, double freq,
399                         double base_freq, double volume, double pan);
400
401// Sample bank: get/load data for per-key sample storage
402int audio_sample_get_data(ACAudio *audio, float *out, int max_len);
403void audio_sample_load_data(ACAudio *audio, const float *data, int len, unsigned int rate);
404int audio_output_get_recent(ACAudio *audio, float *out, int max_len, unsigned int *out_rate);
405
406// Adjust system volume: delta is -5 to +5 (percentage points), 0 = toggle mute
407void audio_volume_adjust(ACAudio *audio, int delta);
408
409// Play a short boot beep (immediately after audio init)
410void audio_boot_beep(ACAudio *audio);
411
412// Play a ready melody (when piece is loaded and ready to play)
413void audio_prewarm(ACAudio *audio);
414void audio_ready_melody(ACAudio *audio);
415
416// Play a shutdown sound (before cleanup)
417void audio_shutdown_sound(ACAudio *audio);
418
419// Sample persistence (save/load to disk)
420int audio_sample_save(ACAudio *audio, const char *path);
421int audio_sample_load(ACAudio *audio, const char *path);
422
423// DJ deck control
424int audio_deck_load(ACAudio *audio, int deck, const char *path);
425void audio_deck_play(ACAudio *audio, int deck);
426void audio_deck_pause(ACAudio *audio, int deck);
427void audio_deck_seek(ACAudio *audio, int deck, double seconds);
428void audio_deck_set_speed(ACAudio *audio, int deck, double speed);
429void audio_deck_set_volume(ACAudio *audio, int deck, float vol);
430void audio_deck_set_crossfader(ACAudio *audio, float value);
431void audio_deck_set_master_volume(ACAudio *audio, float value);
432
433// Cleanup
434void audio_destroy(ACAudio *audio);
435
436// Convert note name to frequency
437double audio_note_to_freq(const char *note);
438
439#endif