speakers: identify and merge-names CLI commands

personal memory agent

Thin CLI wrappers around discovery.identify_cluster and bootstrap.merge_names
for agent consumption via sol call speakers. Both output JSON by default (no
--json flag) with errors to stderr and exit code 1.

Jer Miller 4 weeks ago 6bf73142 3c5ddc5c

+113

3 changed files

expand all

apps

speakers

call.py

tests

test_discovery.py

test_merge_names.py

+41

apps/speakers/call.py

··· 10 10 sol call speakers attribute-segment <day> <stream> <segment> [--json] 11 11 sol call speakers backfill [--dry-run] [--json] 12 12 sol call speakers discover [--json] 13 + sol call speakers identify <cluster-id> <name> [--entity-id ID] 14 + sol call speakers merge-names <alias> <canonical> 13 15 """ 14 16 15 17 from __future__ import annotations ··· 341 343 f"sid={sample['sentence_id']}: {text_preview}" 342 344 ) 343 345 typer.echo() 346 + 347 + 348 + @app.command() 349 + def identify( 350 + cluster_id: int = typer.Argument(..., help="Cluster ID from discovery output."), 351 + name: str = typer.Argument(..., help="Speaker name to assign."), 352 + entity_id: str | None = typer.Option( 353 + None, "--entity-id", help="Link to existing entity ID instead of name matching." 354 + ), 355 + ) -> None: 356 + """Identify a discovered unknown speaker cluster.""" 357 + import json 358 + 359 + from apps.speakers.discovery import identify_cluster 360 + 361 + result = identify_cluster(cluster_id, name, entity_id=entity_id) 362 + output = json.dumps(result, indent=2, default=str) 363 + if "error" in result: 364 + typer.echo(output, err=True) 365 + raise typer.Exit(1) 366 + typer.echo(output) 367 + 368 + 369 + @app.command("merge-names") 370 + def merge_names_cmd( 371 + alias: str = typer.Argument(..., help="Alias/variant speaker name to merge from."), 372 + canonical: str = typer.Argument(..., help="Canonical speaker name to merge into."), 373 + ) -> None: 374 + """Merge a speaker name variant into a canonical entity.""" 375 + import json 376 + 377 + from apps.speakers.bootstrap import merge_names 378 + 379 + result = merge_names(alias, canonical) 380 + output = json.dumps(result, indent=2, default=str) 381 + if "error" in result: 382 + typer.echo(output, err=True) 383 + raise typer.Exit(1) 384 + typer.echo(output)

+28

apps/speakers/tests/test_discovery.py

··· 9 9 from pathlib import Path 10 10 11 11 import numpy as np 12 + from typer.testing import CliRunner 12 13 14 + from apps.speakers.call import app as speakers_app 13 15 from apps.speakers.discovery import ( 14 16 _discovery_cache_path, 15 17 discover_unknown_speakers, 16 18 identify_cluster, 17 19 ) 20 + 21 + _runner = CliRunner() 18 22 19 23 20 24 def _make_speaker_embeddings( ··· 270 274 271 275 assert result["voiceprints_saved"] == 0 272 276 assert not (env.journal / "entities" / "bob_smith" / "voiceprints.npz").exists() 277 + 278 + 279 + def test_identify_cli_success(speakers_env): 280 + """CLI identify outputs JSON to stdout on success.""" 281 + env = speakers_env() 282 + _setup_owner_centroid(env.journal, [0.0, 1.0]) 283 + embeddings = _make_speaker_embeddings([1.0, 0.0], 5) 284 + _create_cluster_segments(env, embeddings) 285 + 286 + scan_result = discover_unknown_speakers() 287 + cluster_id = scan_result["clusters"][0]["cluster_id"] 288 + 289 + result = _runner.invoke(speakers_app, ["identify", str(cluster_id), "Bob Smith"]) 290 + assert result.exit_code == 0 291 + data = json.loads(result.output) 292 + assert data["status"] == "identified" 293 + assert data["entity_id"] == "bob_smith" 294 + 295 + 296 + def test_identify_cli_error_no_cache(speakers_env): 297 + """CLI identify outputs error JSON to stderr and exits 1 when no cache.""" 298 + speakers_env() 299 + result = _runner.invoke(speakers_app, ["identify", "0", "Nobody"]) 300 + assert result.exit_code == 1

+44

apps/speakers/tests/test_merge_names.py

··· 1 + # SPDX-License-Identifier: AGPL-3.0-only 2 + # Copyright (c) 2026 sol pbc 3 + 4 + """Tests for merge-names CLI command.""" 5 + 6 + from __future__ import annotations 7 + 8 + import json 9 + 10 + import numpy as np 11 + from typer.testing import CliRunner 12 + 13 + from apps.speakers.call import app as speakers_app 14 + 15 + _runner = CliRunner() 16 + 17 + 18 + def test_merge_names_cli_error_missing_entity(speakers_env): 19 + """CLI merge-names outputs error JSON and exits 1 for unknown entity.""" 20 + speakers_env() 21 + result = _runner.invoke(speakers_app, ["merge-names", "Nobody", "Also Nobody"]) 22 + assert result.exit_code == 1 23 + 24 + 25 + def test_merge_names_cli_success(speakers_env): 26 + """CLI merge-names outputs JSON with merged=True on success.""" 27 + env = speakers_env() 28 + entity_a = env.create_entity("Alice Alias") 29 + entity_b = env.create_entity("Alice Canonical") 30 + 31 + emb_a = np.random.default_rng(42).standard_normal((3, 256)).astype(np.float32) 32 + emb_b = np.random.default_rng(99).standard_normal((3, 256)).astype(np.float32) 33 + meta_a = np.array([json.dumps({"key": f"a_{i}"}) for i in range(3)], dtype=str) 34 + meta_b = np.array([json.dumps({"key": f"b_{i}"}) for i in range(3)], dtype=str) 35 + np.savez_compressed(entity_a / "voiceprints.npz", embeddings=emb_a, metadata=meta_a) 36 + np.savez_compressed(entity_b / "voiceprints.npz", embeddings=emb_b, metadata=meta_b) 37 + 38 + result = _runner.invoke( 39 + speakers_app, ["merge-names", "Alice Alias", "Alice Canonical"] 40 + ) 41 + assert result.exit_code == 0 42 + data = json.loads(result.output) 43 + assert data["merged"] is True 44 + assert data["canonical_name"] == "Alice Canonical"