at main 9.0 kB view raw
1#!/usr/bin/env -S uv run --script --quiet 2"""Utility CLI for generating synthetic audio snippets with ffmpeg. 3 4Examples: 5 # 3 second stereo sine wave @ 440 Hz written to tmp.wav 6 uv run scripts/generate_audio_sample.py tmp.wav --duration 3 7 8 # pink noise bed with gentle fades 9 uv run scripts/generate_audio_sample.py pink.wav --waveform noise \\ 10 --noise-color pink --duration 15 --fade-in 1.5 --fade-out 2 11 12 # chord built from multiple partials 13 uv run scripts/generate_audio_sample.py chord.wav --partials 660 \\ 14 --partials 880:0.35 --duration 5 --frequency 440 --amplitude 0.6 15""" 16 17from __future__ import annotations 18 19import argparse 20import shlex 21import shutil 22import subprocess 23import sys 24from collections.abc import Iterable, Sequence 25from pathlib import Path 26 27LOG_LEVELS = [ 28 "quiet", 29 "panic", 30 "fatal", 31 "error", 32 "warning", 33 "info", 34 "verbose", 35 "debug", 36 "trace", 37] 38 39 40def positive_float(value: str) -> float: 41 num = float(value) 42 if num <= 0: 43 raise argparse.ArgumentTypeError(f"value must be > 0 (got {value})") 44 return num 45 46 47def non_negative_float(value: str) -> float: 48 num = float(value) 49 if num < 0: 50 raise argparse.ArgumentTypeError(f"value must be >= 0 (got {value})") 51 return num 52 53 54def amplitude_value(value: str) -> float: 55 num = float(value) 56 if not 0 < num <= 1: 57 raise argparse.ArgumentTypeError( 58 "amplitude must be between 0 and 1 (exclusive)" 59 ) 60 return num 61 62 63def positive_int(value: str) -> int: 64 num = int(value) 65 if num <= 0: 66 raise argparse.ArgumentTypeError(f"value must be > 0 (got {value})") 67 return num 68 69 70def parse_partial(value: str) -> tuple[float, float]: 71 freq_part, _, level_part = value.partition(":") 72 freq = positive_float(freq_part) 73 level = float(level_part) if level_part else 1.0 74 if level <= 0: 75 raise argparse.ArgumentTypeError("partial weight must be > 0") 76 return freq, level 77 78 79def parse_tag_pairs(pairs: Sequence[str]) -> list[tuple[str, str]]: 80 parsed: list[tuple[str, str]] = [] 81 for pair in pairs: 82 if "=" not in pair: 83 raise argparse.ArgumentTypeError( 84 f"metadata tags must look like KEY=VALUE (got {pair})" 85 ) 86 key, value = pair.split("=", 1) 87 key = key.strip() 88 if not key: 89 raise argparse.ArgumentTypeError("metadata key cannot be empty") 90 parsed.append((key, value.strip())) 91 return parsed 92 93 94def wave_expression(waveform: str, frequency: float) -> str: 95 angular = f"2*PI*{frequency}*t" 96 base = f"t*{frequency}" 97 98 if waveform == "sine": 99 return f"sin({angular})" 100 if waveform == "square": 101 return f"(gt(sin({angular}),0)*2-1)" 102 if waveform == "triangle": 103 return f"(abs(4*(({base})-floor({base}+0.75))-2)-1)" 104 if waveform == "saw": 105 return f"(2*((({base})-floor({base}+0.5))))" 106 raise ValueError(f"unsupported waveform {waveform}") 107 108 109def build_tone_filtergraph( 110 waveform: str, 111 frequency: float, 112 duration: float, 113 sample_rate: int, 114 amplitude: float, 115 partials: Iterable[tuple[float, float]], 116) -> str: 117 if waveform != "sine" and list(partials): 118 raise ValueError("--partials are only supported when waveform is 'sine'") 119 120 expr = wave_expression(waveform, frequency) 121 122 if waveform == "sine": 123 terms: list[tuple[str, float]] = [(expr, 1.0)] 124 for freq, weight in partials: 125 terms.append((wave_expression("sine", freq), weight)) 126 total_weight = sum(weight for _, weight in terms) 127 combined = "+".join(f"{weight}*({term})" for term, weight in terms) 128 expr = f"({combined})/{total_weight}" 129 130 expr = f"{amplitude}*({expr})" 131 return f"aevalsrc=exprs='{expr}':s={sample_rate}:d={duration}" 132 133 134def build_noise_filtergraph( 135 color: str, duration: float, sample_rate: int, amplitude: float 136) -> str: 137 return ( 138 "anoisesrc=" 139 f"color={color}:" 140 f"sample_rate={sample_rate}:" 141 f"duration={duration}:" 142 f"amplitude={amplitude}" 143 ) 144 145 146def apply_fades(graph: str, duration: float, fade_in: float, fade_out: float) -> str: 147 filters = [graph] 148 if fade_in > 0: 149 filters.append(f"afade=t=in:ss=0:d={fade_in}") 150 if fade_out > 0: 151 start = max(duration - fade_out, 0) 152 filters.append(f"afade=t=out:st={start}:d={fade_out}") 153 return ",".join(filters) 154 155 156def build_ffmpeg_command( 157 lavfi_graph: str, 158 output: Path, 159 duration: float, 160 sample_rate: int, 161 channels: int, 162 log_level: str, 163 force: bool, 164 metadata: Sequence[tuple[str, str]], 165) -> list[str]: 166 cmd: list[str] = [ 167 "ffmpeg", 168 "-hide_banner", 169 "-loglevel", 170 log_level, 171 "-f", 172 "lavfi", 173 "-i", 174 lavfi_graph, 175 "-t", 176 f"{duration}", 177 "-ar", 178 str(sample_rate), 179 "-ac", 180 str(channels), 181 ] 182 183 for key, value in metadata: 184 cmd.extend(["-metadata", f"{key}={value}"]) 185 186 cmd.append("-y" if force else "-n") 187 cmd.append(str(output)) 188 return cmd 189 190 191def main() -> None: 192 parser = argparse.ArgumentParser( 193 description="Generate synthetic audio files via ffmpeg." 194 ) 195 parser.add_argument( 196 "output", 197 type=Path, 198 help="Path for the rendered audio (extension dictates format).", 199 ) 200 parser.add_argument( 201 "--waveform", 202 choices=["sine", "square", "triangle", "saw", "noise"], 203 default="sine", 204 ) 205 parser.add_argument( 206 "--duration", type=positive_float, default=5.0, help="Audio length in seconds." 207 ) 208 parser.add_argument( 209 "--frequency", 210 type=positive_float, 211 default=440.0, 212 help="Fundamental frequency in Hz.", 213 ) 214 parser.add_argument( 215 "--partials", 216 metavar="FREQ[:LEVEL]", 217 action="append", 218 default=[], 219 help="Additional sine components (only for sine waveform). Repeatable.", 220 ) 221 parser.add_argument( 222 "--noise-color", 223 choices=["white", "pink", "brown", "blue"], 224 default="white", 225 help="Color to use when waveform=noise.", 226 ) 227 parser.add_argument( 228 "--amplitude", 229 type=amplitude_value, 230 default=0.35, 231 help="Overall output amplitude (0-1].", 232 ) 233 parser.add_argument( 234 "--sample-rate", type=positive_int, default=48000, help="Samples per second." 235 ) 236 parser.add_argument( 237 "--channels", type=positive_int, default=2, help="Number of output channels." 238 ) 239 parser.add_argument( 240 "--fade-in", 241 type=non_negative_float, 242 default=0.0, 243 help="Apply fade-in of N seconds.", 244 ) 245 parser.add_argument( 246 "--fade-out", 247 type=non_negative_float, 248 default=0.0, 249 help="Apply fade-out of N seconds.", 250 ) 251 parser.add_argument( 252 "--tag", 253 metavar="KEY=VALUE", 254 action="append", 255 default=[], 256 help="Optional metadata tags to embed. Repeatable.", 257 ) 258 parser.add_argument( 259 "--log-level", 260 choices=LOG_LEVELS, 261 default="warning", 262 help="ffmpeg log verbosity.", 263 ) 264 parser.add_argument( 265 "--force", action="store_true", help="Overwrite output file if it exists." 266 ) 267 parser.add_argument( 268 "--dry-run", 269 action="store_true", 270 help="Print the ffmpeg command without running it.", 271 ) 272 273 args = parser.parse_args() 274 275 if shutil.which("ffmpeg") is None: 276 parser.error("ffmpeg executable not found on PATH.") 277 278 if args.fade_in + args.fade_out > args.duration: 279 parser.error("sum of fade-in and fade-out cannot exceed total duration.") 280 281 metadata = parse_tag_pairs(args.tag) 282 283 partials = [parse_partial(p) for p in args.partials] 284 try: 285 if args.waveform == "noise": 286 lavfi = build_noise_filtergraph( 287 args.noise_color, args.duration, args.sample_rate, args.amplitude 288 ) 289 else: 290 lavfi = build_tone_filtergraph( 291 args.waveform, 292 args.frequency, 293 args.duration, 294 args.sample_rate, 295 args.amplitude, 296 partials, 297 ) 298 except ValueError as exc: 299 parser.error(str(exc)) 300 301 lavfi = apply_fades(lavfi, args.duration, args.fade_in, args.fade_out) 302 303 if not args.output.parent.exists(): 304 args.output.parent.mkdir(parents=True, exist_ok=True) 305 306 cmd = build_ffmpeg_command( 307 lavfi, 308 args.output, 309 args.duration, 310 args.sample_rate, 311 args.channels, 312 args.log_level, 313 args.force, 314 metadata, 315 ) 316 317 if args.dry_run: 318 print("ffmpeg command:") 319 print(" " + shlex.join(cmd)) 320 return 321 322 try: 323 subprocess.run(cmd, check=True) 324 except subprocess.CalledProcessError as exc: 325 sys.exit(exc.returncode) 326 327 328if __name__ == "__main__": 329 main()