personal memory agent
at main 740 lines 25 kB view raw
1# SPDX-License-Identifier: AGPL-3.0-only 2# Copyright (c) 2026 sol pbc 3 4"""Tests for think.models module.""" 5 6import pytest 7 8from think.models import ( 9 CLAUDE_HAIKU_4, 10 CLAUDE_OPUS_4, 11 CLAUDE_SONNET_4, 12 GEMINI_FLASH, 13 GEMINI_LITE, 14 GEMINI_PRO, 15 GPT_5, 16 GPT_5_MINI, 17 GPT_5_NANO, 18 PROMPT_PATHS, 19 PROVIDER_DEFAULTS, 20 TIER_FLASH, 21 TIER_LITE, 22 TIER_PRO, 23 TYPE_DEFAULTS, 24 calc_token_cost, 25 get_context_registry, 26 get_usage_cost, 27 iter_token_log, 28 resolve_provider, 29) 30 31 32def test_calc_token_cost_basic(): 33 """Test basic cost calculation with a known model.""" 34 token_data = { 35 "model": "gpt-4o", 36 "usage": { 37 "input_tokens": 1000, 38 "output_tokens": 100, 39 "total_tokens": 1100, 40 }, 41 } 42 43 result = calc_token_cost(token_data) 44 45 assert result is not None 46 assert "total_cost" in result 47 assert "input_cost" in result 48 assert "output_cost" in result 49 assert "currency" in result 50 assert result["currency"] == "USD" 51 assert result["total_cost"] > 0 52 assert result["input_cost"] > 0 53 assert result["output_cost"] > 0 54 55 56def test_calc_token_cost_with_cache(): 57 """Test cost calculation with cached tokens.""" 58 token_data = { 59 "model": "claude-sonnet-4-20250514", 60 "usage": { 61 "input_tokens": 1000, 62 "output_tokens": 100, 63 "cached_tokens": 500, 64 "total_tokens": 1600, 65 }, 66 } 67 68 result = calc_token_cost(token_data) 69 70 assert result is not None 71 assert result["total_cost"] > 0 72 # Cached tokens should reduce the cost compared to all uncached 73 assert result["input_cost"] >= 0 74 75 76def test_calc_token_cost_unknown_model(): 77 """Test that unknown models return None.""" 78 token_data = { 79 "model": "random-model-xyz", 80 "usage": { 81 "input_tokens": 1000, 82 "output_tokens": 100, 83 }, 84 } 85 86 result = calc_token_cost(token_data) 87 assert result is None 88 89 90def test_calc_token_cost_missing_data(): 91 """Test that missing data returns None.""" 92 # Missing model 93 assert calc_token_cost({"usage": {"input_tokens": 1000}}) is None 94 95 # Missing usage 96 assert calc_token_cost({"model": "gpt-4o"}) is None 97 98 # Empty dict 99 assert calc_token_cost({}) is None 100 101 102def test_calc_token_cost_with_reasoning_tokens(): 103 """Test cost calculation includes reasoning tokens in output.""" 104 token_data = { 105 "model": "gpt-4o", 106 "usage": { 107 "input_tokens": 1000, 108 "output_tokens": 100, 109 "reasoning_tokens": 50, 110 "total_tokens": 1150, 111 }, 112 } 113 114 result = calc_token_cost(token_data) 115 116 # Should succeed - reasoning tokens are implicitly part of output pricing 117 assert result is not None 118 assert result["total_cost"] > 0 119 120 121# --------------------------------------------------------------------------- 122# resolve_provider tests 123# --------------------------------------------------------------------------- 124 125 126@pytest.fixture 127def use_fixtures_journal(monkeypatch): 128 """Use the fixtures journal for provider config tests.""" 129 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", "tests/fixtures/journal") 130 131 132def test_resolve_provider_default_generate(use_fixtures_journal): 133 """Test that generate default provider is returned for unknown context.""" 134 provider, model = resolve_provider("unknown.context", "generate") 135 assert provider == "google" 136 # Default tier is 2, which is overridden in fixture config to custom model 137 assert model == "gemini-custom-flash-test" 138 139 140def test_resolve_provider_default_cogitate(use_fixtures_journal): 141 """Test that cogitate default provider is returned for unknown context.""" 142 provider, model = resolve_provider("unknown.context", "cogitate") 143 assert provider == "openai" 144 assert model == GPT_5_MINI 145 146 147def test_resolve_provider_exact_match(use_fixtures_journal): 148 """Test that exact context match works.""" 149 provider, model = resolve_provider("test.openai", "generate") 150 assert provider == "openai" 151 assert model == "gpt-5-mini" 152 153 154def test_resolve_provider_glob_match(use_fixtures_journal): 155 """Test that glob pattern matching works.""" 156 # observe.* pattern should match 157 provider, model = resolve_provider("observe.describe.frame", "generate") 158 assert provider == "google" 159 assert model == "gemini-2.5-flash-lite" 160 161 # Also matches with other suffixes 162 provider, model = resolve_provider("observe.enrich", "generate") 163 assert provider == "google" 164 assert model == "gemini-2.5-flash-lite" 165 166 167def test_resolve_provider_anthropic(use_fixtures_journal): 168 """Test anthropic provider routing.""" 169 provider, model = resolve_provider("test.anthropic", "generate") 170 assert provider == "anthropic" 171 assert model == "claude-sonnet-4-5" 172 173 174def test_resolve_provider_empty_context(use_fixtures_journal): 175 """Test that empty context returns default.""" 176 provider, model = resolve_provider("", "generate") 177 assert provider == "google" 178 179 180def test_resolve_provider_no_config(monkeypatch, tmp_path): 181 """Test fallback when no provider config exists.""" 182 # Use a journal path with no config 183 empty_journal = tmp_path / "empty_journal" 184 empty_journal.mkdir() 185 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(empty_journal)) 186 187 provider, model = resolve_provider("anything", "generate") 188 assert provider == "google" 189 assert model == GEMINI_FLASH 190 191 provider, model = resolve_provider("anything", "cogitate") 192 assert provider == "openai" 193 assert model == GPT_5_MINI 194 195 196# --------------------------------------------------------------------------- 197# Tier system tests 198# --------------------------------------------------------------------------- 199 200 201def test_tier_constants(): 202 """Test tier constant values.""" 203 assert TIER_PRO == 1 204 assert TIER_FLASH == 2 205 assert TIER_LITE == 3 206 207 208def test_type_defaults(): 209 """Test TYPE_DEFAULTS structure for generate and cogitate.""" 210 assert "generate" in TYPE_DEFAULTS 211 assert "cogitate" in TYPE_DEFAULTS 212 213 for agent_type in ("generate", "cogitate"): 214 defaults = TYPE_DEFAULTS[agent_type] 215 assert "provider" in defaults 216 assert "tier" in defaults 217 assert "backup" in defaults 218 219 assert TYPE_DEFAULTS["generate"]["provider"] == "google" 220 assert TYPE_DEFAULTS["cogitate"]["provider"] == "openai" 221 222 223def test_prompt_paths_exist(): 224 """Test all PROMPT_PATHS files exist and have valid frontmatter.""" 225 from pathlib import Path 226 227 import frontmatter 228 229 base_dir = Path(__file__).parent.parent # Project root 230 required_keys = {"context", "tier", "label", "group"} 231 232 for rel_path in PROMPT_PATHS: 233 path = base_dir / rel_path 234 assert path.exists(), f"Prompt file not found: {rel_path}" 235 236 post = frontmatter.load(path) 237 meta = post.metadata or {} 238 239 assert required_keys <= set(meta.keys()), ( 240 f"{rel_path} missing keys: {required_keys - set(meta.keys())}" 241 ) 242 assert meta["tier"] in ( 243 TIER_PRO, 244 TIER_FLASH, 245 TIER_LITE, 246 ), f"{rel_path} has invalid tier: {meta['tier']}" 247 assert isinstance(meta["label"], str) and meta["label"], ( 248 f"{rel_path} has invalid label: {meta['label']}" 249 ) 250 assert isinstance(meta["group"], str) and meta["group"], ( 251 f"{rel_path} has invalid group: {meta['group']}" 252 ) 253 254 255def test_prompt_contexts_in_registry(): 256 """Test prompt contexts are discovered and in registry.""" 257 registry = get_context_registry() 258 259 # Verify known prompt contexts exist with correct values 260 assert "observe.describe.frame" in registry 261 assert registry["observe.describe.frame"]["tier"] == TIER_LITE 262 assert registry["observe.describe.frame"]["group"] == "Observe" 263 264 assert "observe.enrich" in registry 265 assert registry["observe.enrich"]["tier"] == TIER_FLASH 266 267 assert "detect.created" in registry 268 assert registry["detect.created"]["tier"] == TIER_LITE 269 270 271def test_provider_defaults_structure(): 272 """Test PROVIDER_DEFAULTS contains all providers and tiers.""" 273 assert "google" in PROVIDER_DEFAULTS 274 assert "openai" in PROVIDER_DEFAULTS 275 assert "anthropic" in PROVIDER_DEFAULTS 276 277 for provider in PROVIDER_DEFAULTS: 278 assert TIER_PRO in PROVIDER_DEFAULTS[provider] 279 assert TIER_FLASH in PROVIDER_DEFAULTS[provider] 280 assert TIER_LITE in PROVIDER_DEFAULTS[provider] 281 282 283def test_provider_defaults_models(): 284 """Test PROVIDER_DEFAULTS maps to correct model constants.""" 285 assert PROVIDER_DEFAULTS["google"][TIER_PRO] == GEMINI_PRO 286 assert PROVIDER_DEFAULTS["google"][TIER_FLASH] == GEMINI_FLASH 287 assert PROVIDER_DEFAULTS["google"][TIER_LITE] == GEMINI_LITE 288 289 assert PROVIDER_DEFAULTS["openai"][TIER_PRO] == GPT_5 290 assert PROVIDER_DEFAULTS["openai"][TIER_FLASH] == GPT_5_MINI 291 assert PROVIDER_DEFAULTS["openai"][TIER_LITE] == GPT_5_NANO 292 293 assert PROVIDER_DEFAULTS["anthropic"][TIER_PRO] == CLAUDE_OPUS_4 294 assert PROVIDER_DEFAULTS["anthropic"][TIER_FLASH] == CLAUDE_SONNET_4 295 assert PROVIDER_DEFAULTS["anthropic"][TIER_LITE] == CLAUDE_HAIKU_4 296 297 298def test_resolve_provider_tier_based(use_fixtures_journal): 299 """Test tier-based resolution.""" 300 # test.tier has tier: 1 (pro) 301 provider, model = resolve_provider("test.tier", "generate") 302 assert provider == "google" 303 assert model == GEMINI_PRO 304 305 306def test_resolve_provider_tier_inherit_provider(use_fixtures_journal): 307 """Test tier with inherited provider from type default.""" 308 # test.tier.inherit has tier: 3 only, should inherit google from generate default 309 provider, model = resolve_provider("test.tier.inherit", "generate") 310 assert provider == "google" 311 assert model == GEMINI_LITE 312 313 # Same context with cogitate should inherit openai 314 provider, model = resolve_provider("test.tier.inherit", "cogitate") 315 assert provider == "openai" 316 assert model == GPT_5_NANO 317 318 319def test_resolve_provider_tier_with_provider(use_fixtures_journal): 320 """Test tier with explicit provider.""" 321 # test.tier.override has provider: openai, tier: 2 322 provider, model = resolve_provider("test.tier.override", "generate") 323 assert provider == "openai" 324 assert model == GPT_5_MINI 325 326 327def test_resolve_provider_tier_glob(use_fixtures_journal): 328 """Test tier-based glob pattern matching.""" 329 # observe.* now uses tier: 3 instead of explicit model 330 provider, model = resolve_provider("observe.describe.frame", "generate") 331 assert provider == "google" 332 assert model == GEMINI_LITE 333 334 335def test_resolve_provider_model_overrides_tier(use_fixtures_journal): 336 """Test that explicit model takes precedence over tier.""" 337 # test.openai has explicit model, not tier 338 provider, model = resolve_provider("test.openai", "generate") 339 assert provider == "openai" 340 assert model == "gpt-5-mini" 341 342 343def test_resolve_provider_default_tier(use_fixtures_journal): 344 """Test default uses tier-based resolution with config override.""" 345 # Generate default is tier: 2, which is overridden in config to custom model 346 provider, model = resolve_provider("unknown.context", "generate") 347 assert provider == "google" 348 assert model == "gemini-custom-flash-test" 349 350 351def test_resolve_provider_config_model_override(use_fixtures_journal): 352 """Test that config models section overrides system defaults.""" 353 # test.config.override uses tier: 2, which is overridden in config 354 provider, model = resolve_provider("test.config.override", "generate") 355 assert provider == "google" 356 # Should use the custom model from config, not system default GEMINI_FLASH 357 assert model == "gemini-custom-flash-test" 358 assert model != GEMINI_FLASH 359 360 361def test_resolve_provider_tier_fallback_to_system_default(use_fixtures_journal): 362 """Test that tiers not in config fall back to system defaults.""" 363 # test.tier uses tier: 1 (pro), which is NOT overridden in config 364 # Should fall back to system default GEMINI_PRO 365 provider, model = resolve_provider("test.tier", "generate") 366 assert provider == "google" 367 assert model == GEMINI_PRO 368 369 370def test_resolve_provider_invalid_tier(use_fixtures_journal, monkeypatch, tmp_path): 371 """Test that invalid tier values fall back to default tier.""" 372 import json 373 374 # Create a config with an invalid tier 375 config_dir = tmp_path / "config" 376 config_dir.mkdir() 377 config = { 378 "providers": { 379 "generate": {"provider": "google", "tier": 2}, 380 "contexts": { 381 "test.invalid": {"provider": "google", "tier": 99}, 382 "test.string": {"provider": "google", "tier": "flash"}, 383 }, 384 } 385 } 386 (config_dir / "journal.json").write_text(json.dumps(config)) 387 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 388 389 # Invalid tier 99 should fall back to generate default tier (2) 390 provider, model = resolve_provider("test.invalid", "generate") 391 assert provider == "google" 392 assert model == GEMINI_FLASH # tier 2 system default 393 394 # String tier should also fall back 395 provider, model = resolve_provider("test.string", "generate") 396 assert provider == "google" 397 assert model == GEMINI_FLASH 398 399 400# --------------------------------------------------------------------------- 401# Dynamic context registry tests 402# --------------------------------------------------------------------------- 403 404 405def test_context_registry_includes_prompt_contexts(): 406 """Test that registry includes all contexts from PROMPT_PATHS.""" 407 from pathlib import Path 408 409 import frontmatter 410 411 registry = get_context_registry() 412 base_dir = Path(__file__).parent.parent 413 414 # All prompt contexts should be in registry with correct tier 415 for rel_path in PROMPT_PATHS: 416 path = base_dir / rel_path 417 post = frontmatter.load(path) 418 meta = post.metadata or {} 419 context = meta.get("context") 420 421 assert context in registry, f"Prompt context {context} not in registry" 422 assert registry[context]["tier"] == meta["tier"] 423 424 425def test_context_registry_includes_categories(): 426 """Test that registry includes discovered category contexts.""" 427 registry = get_context_registry() 428 429 # Should have category entries (from observe/categories/*.md) 430 category_contexts = [k for k in registry if k.startswith("observe.describe.")] 431 432 # Should have frame + all categories (browsing, code, gaming, etc.) 433 assert len(category_contexts) > 5, "Should discover category contexts" 434 435 # Each category context should have required fields 436 for context in category_contexts: 437 assert "tier" in registry[context] 438 assert "label" in registry[context] 439 assert "group" in registry[context] 440 assert registry[context]["tier"] in (TIER_PRO, TIER_FLASH, TIER_LITE) 441 442 443def test_context_registry_includes_talent_configs(): 444 """Test that registry includes discovered talent contexts (agents + generators).""" 445 registry = get_context_registry() 446 447 # Should have talent entries (from talent/*.md and apps/*/talent/*.md) 448 talent_contexts = [k for k in registry if k.startswith("talent.")] 449 450 # Should have multiple talent contexts (agents + generators) 451 assert len(talent_contexts) > 1, "Should discover talent contexts" 452 453 # Should have system talent configs 454 system_talent = [k for k in talent_contexts if k.startswith("talent.system.")] 455 assert len(system_talent) > 0, "Should discover system talent configs" 456 457 # Should have app talent configs 458 app_talent = [ 459 k 460 for k in talent_contexts 461 if k.startswith("talent.") and not k.startswith("talent.system.") 462 ] 463 assert len(app_talent) > 0, "Should discover app talent configs" 464 465 # Should include type field for talent contexts 466 for context in talent_contexts: 467 assert "type" in registry[context], f"{context} missing type field" 468 469 470def test_context_registry_structure(): 471 """Test that all registry entries have required fields.""" 472 registry = get_context_registry() 473 required_keys = {"tier", "label", "group"} 474 475 for context, config in registry.items(): 476 assert isinstance(config, dict), f"{context} should be a dict" 477 assert required_keys <= set(config.keys()), ( 478 f"{context} missing keys: {required_keys - set(config.keys())}" 479 ) 480 assert config["tier"] in ( 481 TIER_PRO, 482 TIER_FLASH, 483 TIER_LITE, 484 ), f"{context} has invalid tier: {config['tier']}" 485 486 487def test_context_registry_is_cached(): 488 """Test that registry is built once and cached.""" 489 registry1 = get_context_registry() 490 registry2 = get_context_registry() 491 492 # Should return the same object (cached) 493 assert registry1 is registry2 494 495 496# --------------------------------------------------------------------------- 497# Model pricing support tests 498# --------------------------------------------------------------------------- 499 500 501def test_all_default_models_have_pricing(): 502 """Verify all models in PROVIDER_DEFAULTS have genai-prices support. 503 504 This test ensures that when default models are updated, we catch any 505 missing pricing data early. If this test fails: 506 507 1. Run: make update-prices 508 2. Re-run this test 509 3. If still failing, the model may be too new for genai-prices 510 511 See think/models.py model constants section for more details. 512 """ 513 # Collect all unique models from PROVIDER_DEFAULTS 514 all_models = set() 515 for provider_models in PROVIDER_DEFAULTS.values(): 516 all_models.update(provider_models.values()) 517 518 # Also include the named constants directly (in case they differ) 519 all_models.update( 520 [ 521 GEMINI_PRO, 522 GEMINI_FLASH, 523 GEMINI_LITE, 524 GPT_5, 525 GPT_5_MINI, 526 GPT_5_NANO, 527 CLAUDE_OPUS_4, 528 CLAUDE_SONNET_4, 529 CLAUDE_HAIKU_4, 530 ] 531 ) 532 533 missing_pricing = [] 534 for model in sorted(all_models): 535 token_data = { 536 "model": model, 537 "usage": { 538 "input_tokens": 1000, 539 "output_tokens": 100, 540 "total_tokens": 1100, 541 }, 542 } 543 result = calc_token_cost(token_data) 544 if result is None: 545 missing_pricing.append(model) 546 547 if missing_pricing: 548 pytest.fail( 549 f"Models missing genai-prices support: {missing_pricing}\n" 550 "Run 'make update-prices' and re-test. " 551 "If still failing, model may be too new for genai-prices." 552 ) 553 554 555# --------------------------------------------------------------------------- 556# get_usage_cost tests 557# --------------------------------------------------------------------------- 558 559 560def test_get_usage_cost_nonexistent_day(use_fixtures_journal): 561 """Test that nonexistent day returns zeros.""" 562 result = get_usage_cost("19000101") 563 assert result == {"requests": 0, "tokens": 0, "cost": 0.0} 564 565 566def test_get_usage_cost_day_total(use_fixtures_journal): 567 """Test aggregating all entries for a day.""" 568 # 20250823 has test entries with gemini models 569 result = get_usage_cost("20250823") 570 assert result["requests"] > 0 571 assert isinstance(result["tokens"], int) 572 assert isinstance(result["cost"], float) 573 574 575def test_iter_token_log_preserves_type_field(use_fixtures_journal): 576 """Token log iterator should preserve top-level type field.""" 577 entries = list(iter_token_log("20250823")) 578 generate_entries = [entry for entry in entries if entry.get("type") == "generate"] 579 580 assert generate_entries 581 assert any( 582 entry.get("context") == "think.detect_created.classify_new_file" 583 for entry in generate_entries 584 ) 585 586 587def test_get_usage_cost_context_filter(use_fixtures_journal): 588 """Test filtering by context prefix.""" 589 # Filter to test contexts 590 result = get_usage_cost("20250823", context="tests.test_gemini") 591 assert result["requests"] > 0 592 593 # Filter to non-matching context should return zeros 594 result_empty = get_usage_cost("20250823", context="nonexistent.context") 595 assert result_empty["requests"] == 0 596 597 598def test_get_usage_cost_segment_filter(use_fixtures_journal): 599 """Test filtering by segment key.""" 600 # Fixture data includes one entry tagged with segment 143022_300 601 result = get_usage_cost("20250823", segment="143022_300") 602 assert result["requests"] == 1 603 assert result["tokens"] == 7000 604 assert result["cost"] > 0.0 605 606 607def test_get_usage_cost_combined_filters(use_fixtures_journal): 608 """Test combined segment and context filters.""" 609 # With both filters, entries must match both 610 result = get_usage_cost( 611 "20250823", 612 segment="nonexistent", 613 context="tests.test_gemini", 614 ) 615 # Segment doesn't exist, so no matches 616 assert result["requests"] == 0 617 618 619# --------------------------------------------------------------------------- 620# log_token_usage normalization tests 621# --------------------------------------------------------------------------- 622 623 624def test_log_token_usage_computes_total_tokens(tmp_path, monkeypatch): 625 """total_tokens is computed from input+output when missing (Codex CLI format).""" 626 import json 627 628 from think.models import log_token_usage 629 630 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 631 632 # Codex CLI format: no total_tokens 633 log_token_usage( 634 model="gpt-5.2", 635 usage={"input_tokens": 1000, "output_tokens": 200}, 636 context="test", 637 ) 638 639 log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl") 640 entry = json.loads(log_file.read_text().strip()) 641 assert entry["usage"]["total_tokens"] == 1200 642 assert entry["usage"]["input_tokens"] == 1000 643 assert entry["usage"]["output_tokens"] == 200 644 645 646def test_log_token_usage_preserves_existing_total_tokens(tmp_path, monkeypatch): 647 """total_tokens is preserved when already present and non-zero.""" 648 import json 649 650 from think.models import log_token_usage 651 652 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 653 654 log_token_usage( 655 model="gpt-5.2", 656 usage={"input_tokens": 1000, "output_tokens": 200, "total_tokens": 1500}, 657 context="test", 658 ) 659 660 log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl") 661 entry = json.loads(log_file.read_text().strip()) 662 assert entry["usage"]["total_tokens"] == 1500 663 664 665def test_log_token_usage_maps_cached_input_tokens(tmp_path, monkeypatch): 666 """cached_input_tokens (Codex CLI format) maps to cached_tokens.""" 667 import json 668 669 from think.models import log_token_usage 670 671 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 672 673 log_token_usage( 674 model="gpt-5.2", 675 usage={ 676 "input_tokens": 1000, 677 "cached_input_tokens": 800, 678 "output_tokens": 200, 679 }, 680 context="test", 681 ) 682 683 log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl") 684 entry = json.loads(log_file.read_text().strip()) 685 assert entry["usage"]["cached_tokens"] == 800 686 assert entry["usage"]["total_tokens"] == 1200 687 688 689def test_log_token_usage_passes_through_reasoning_tokens(tmp_path, monkeypatch): 690 """reasoning_tokens from provider-normalized usage are preserved in log.""" 691 import json 692 693 from think.models import log_token_usage 694 695 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 696 697 # Normalized usage from Google provider (the bug: reasoning_tokens were dropped) 698 log_token_usage( 699 model="gemini-3-flash-preview", 700 usage={ 701 "input_tokens": 13319, 702 "output_tokens": 969, 703 "total_tokens": 37878, 704 "reasoning_tokens": 23590, 705 }, 706 context="test", 707 ) 708 709 log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl") 710 entry = json.loads(log_file.read_text().strip()) 711 assert entry["usage"]["reasoning_tokens"] == 23590 712 assert entry["usage"]["total_tokens"] == 37878 713 assert entry["usage"]["input_tokens"] == 13319 714 assert entry["usage"]["output_tokens"] == 969 715 716 717def test_log_token_usage_passes_through_cache_creation_tokens(tmp_path, monkeypatch): 718 """cache_creation_tokens from Anthropic provider are preserved in log.""" 719 import json 720 721 from think.models import log_token_usage 722 723 monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path)) 724 725 log_token_usage( 726 model="claude-sonnet-4-5", 727 usage={ 728 "input_tokens": 5000, 729 "output_tokens": 1000, 730 "total_tokens": 6000, 731 "cached_tokens": 3000, 732 "cache_creation_tokens": 2000, 733 }, 734 context="test", 735 ) 736 737 log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl") 738 entry = json.loads(log_file.read_text().strip()) 739 assert entry["usage"]["cache_creation_tokens"] == 2000 740 assert entry["usage"]["cached_tokens"] == 3000