tests/test_models.py at main · solpbc.org/solstone

solpbc.org / solstone
fork atom
personal memory agent
fork atom
solstone / tests / test_models.py
at main 740 lines 25 kB view raw
wrap content
Jer Miller refactor: rename muse → talent project-wide 1d ago
cb19a905
  1# SPDX-License-Identifier: AGPL-3.0-only
  2# Copyright (c) 2026 sol pbc
  3
  4"""Tests for think.models module."""
  5
  6import pytest
  7
  8from think.models import (
  9    CLAUDE_HAIKU_4,
 10    CLAUDE_OPUS_4,
 11    CLAUDE_SONNET_4,
 12    GEMINI_FLASH,
 13    GEMINI_LITE,
 14    GEMINI_PRO,
 15    GPT_5,
 16    GPT_5_MINI,
 17    GPT_5_NANO,
 18    PROMPT_PATHS,
 19    PROVIDER_DEFAULTS,
 20    TIER_FLASH,
 21    TIER_LITE,
 22    TIER_PRO,
 23    TYPE_DEFAULTS,
 24    calc_token_cost,
 25    get_context_registry,
 26    get_usage_cost,
 27    iter_token_log,
 28    resolve_provider,
 29)
 30
 31
 32def test_calc_token_cost_basic():
 33    """Test basic cost calculation with a known model."""
 34    token_data = {
 35        "model": "gpt-4o",
 36        "usage": {
 37            "input_tokens": 1000,
 38            "output_tokens": 100,
 39            "total_tokens": 1100,
 40        },
 41    }
 42
 43    result = calc_token_cost(token_data)
 44
 45    assert result is not None
 46    assert "total_cost" in result
 47    assert "input_cost" in result
 48    assert "output_cost" in result
 49    assert "currency" in result
 50    assert result["currency"] == "USD"
 51    assert result["total_cost"] > 0
 52    assert result["input_cost"] > 0
 53    assert result["output_cost"] > 0
 54
 55
 56def test_calc_token_cost_with_cache():
 57    """Test cost calculation with cached tokens."""
 58    token_data = {
 59        "model": "claude-sonnet-4-20250514",
 60        "usage": {
 61            "input_tokens": 1000,
 62            "output_tokens": 100,
 63            "cached_tokens": 500,
 64            "total_tokens": 1600,
 65        },
 66    }
 67
 68    result = calc_token_cost(token_data)
 69
 70    assert result is not None
 71    assert result["total_cost"] > 0
 72    # Cached tokens should reduce the cost compared to all uncached
 73    assert result["input_cost"] >= 0
 74
 75
 76def test_calc_token_cost_unknown_model():
 77    """Test that unknown models return None."""
 78    token_data = {
 79        "model": "random-model-xyz",
 80        "usage": {
 81            "input_tokens": 1000,
 82            "output_tokens": 100,
 83        },
 84    }
 85
 86    result = calc_token_cost(token_data)
 87    assert result is None
 88
 89
 90def test_calc_token_cost_missing_data():
 91    """Test that missing data returns None."""
 92    # Missing model
 93    assert calc_token_cost({"usage": {"input_tokens": 1000}}) is None
 94
 95    # Missing usage
 96    assert calc_token_cost({"model": "gpt-4o"}) is None
 97
 98    # Empty dict
 99    assert calc_token_cost({}) is None
100
101
102def test_calc_token_cost_with_reasoning_tokens():
103    """Test cost calculation includes reasoning tokens in output."""
104    token_data = {
105        "model": "gpt-4o",
106        "usage": {
107            "input_tokens": 1000,
108            "output_tokens": 100,
109            "reasoning_tokens": 50,
110            "total_tokens": 1150,
111        },
112    }
113
114    result = calc_token_cost(token_data)
115
116    # Should succeed - reasoning tokens are implicitly part of output pricing
117    assert result is not None
118    assert result["total_cost"] > 0
119
120
121# ---------------------------------------------------------------------------
122# resolve_provider tests
123# ---------------------------------------------------------------------------
124
125
126@pytest.fixture
127def use_fixtures_journal(monkeypatch):
128    """Use the fixtures journal for provider config tests."""
129    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", "tests/fixtures/journal")
130
131
132def test_resolve_provider_default_generate(use_fixtures_journal):
133    """Test that generate default provider is returned for unknown context."""
134    provider, model = resolve_provider("unknown.context", "generate")
135    assert provider == "google"
136    # Default tier is 2, which is overridden in fixture config to custom model
137    assert model == "gemini-custom-flash-test"
138
139
140def test_resolve_provider_default_cogitate(use_fixtures_journal):
141    """Test that cogitate default provider is returned for unknown context."""
142    provider, model = resolve_provider("unknown.context", "cogitate")
143    assert provider == "openai"
144    assert model == GPT_5_MINI
145
146
147def test_resolve_provider_exact_match(use_fixtures_journal):
148    """Test that exact context match works."""
149    provider, model = resolve_provider("test.openai", "generate")
150    assert provider == "openai"
151    assert model == "gpt-5-mini"
152
153
154def test_resolve_provider_glob_match(use_fixtures_journal):
155    """Test that glob pattern matching works."""
156    # observe.* pattern should match
157    provider, model = resolve_provider("observe.describe.frame", "generate")
158    assert provider == "google"
159    assert model == "gemini-2.5-flash-lite"
160
161    # Also matches with other suffixes
162    provider, model = resolve_provider("observe.enrich", "generate")
163    assert provider == "google"
164    assert model == "gemini-2.5-flash-lite"
165
166
167def test_resolve_provider_anthropic(use_fixtures_journal):
168    """Test anthropic provider routing."""
169    provider, model = resolve_provider("test.anthropic", "generate")
170    assert provider == "anthropic"
171    assert model == "claude-sonnet-4-5"
172
173
174def test_resolve_provider_empty_context(use_fixtures_journal):
175    """Test that empty context returns default."""
176    provider, model = resolve_provider("", "generate")
177    assert provider == "google"
178
179
180def test_resolve_provider_no_config(monkeypatch, tmp_path):
181    """Test fallback when no provider config exists."""
182    # Use a journal path with no config
183    empty_journal = tmp_path / "empty_journal"
184    empty_journal.mkdir()
185    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(empty_journal))
186
187    provider, model = resolve_provider("anything", "generate")
188    assert provider == "google"
189    assert model == GEMINI_FLASH
190
191    provider, model = resolve_provider("anything", "cogitate")
192    assert provider == "openai"
193    assert model == GPT_5_MINI
194
195
196# ---------------------------------------------------------------------------
197# Tier system tests
198# ---------------------------------------------------------------------------
199
200
201def test_tier_constants():
202    """Test tier constant values."""
203    assert TIER_PRO == 1
204    assert TIER_FLASH == 2
205    assert TIER_LITE == 3
206
207
208def test_type_defaults():
209    """Test TYPE_DEFAULTS structure for generate and cogitate."""
210    assert "generate" in TYPE_DEFAULTS
211    assert "cogitate" in TYPE_DEFAULTS
212
213    for agent_type in ("generate", "cogitate"):
214        defaults = TYPE_DEFAULTS[agent_type]
215        assert "provider" in defaults
216        assert "tier" in defaults
217        assert "backup" in defaults
218
219    assert TYPE_DEFAULTS["generate"]["provider"] == "google"
220    assert TYPE_DEFAULTS["cogitate"]["provider"] == "openai"
221
222
223def test_prompt_paths_exist():
224    """Test all PROMPT_PATHS files exist and have valid frontmatter."""
225    from pathlib import Path
226
227    import frontmatter
228
229    base_dir = Path(__file__).parent.parent  # Project root
230    required_keys = {"context", "tier", "label", "group"}
231
232    for rel_path in PROMPT_PATHS:
233        path = base_dir / rel_path
234        assert path.exists(), f"Prompt file not found: {rel_path}"
235
236        post = frontmatter.load(path)
237        meta = post.metadata or {}
238
239        assert required_keys <= set(meta.keys()), (
240            f"{rel_path} missing keys: {required_keys - set(meta.keys())}"
241        )
242        assert meta["tier"] in (
243            TIER_PRO,
244            TIER_FLASH,
245            TIER_LITE,
246        ), f"{rel_path} has invalid tier: {meta['tier']}"
247        assert isinstance(meta["label"], str) and meta["label"], (
248            f"{rel_path} has invalid label: {meta['label']}"
249        )
250        assert isinstance(meta["group"], str) and meta["group"], (
251            f"{rel_path} has invalid group: {meta['group']}"
252        )
253
254
255def test_prompt_contexts_in_registry():
256    """Test prompt contexts are discovered and in registry."""
257    registry = get_context_registry()
258
259    # Verify known prompt contexts exist with correct values
260    assert "observe.describe.frame" in registry
261    assert registry["observe.describe.frame"]["tier"] == TIER_LITE
262    assert registry["observe.describe.frame"]["group"] == "Observe"
263
264    assert "observe.enrich" in registry
265    assert registry["observe.enrich"]["tier"] == TIER_FLASH
266
267    assert "detect.created" in registry
268    assert registry["detect.created"]["tier"] == TIER_LITE
269
270
271def test_provider_defaults_structure():
272    """Test PROVIDER_DEFAULTS contains all providers and tiers."""
273    assert "google" in PROVIDER_DEFAULTS
274    assert "openai" in PROVIDER_DEFAULTS
275    assert "anthropic" in PROVIDER_DEFAULTS
276
277    for provider in PROVIDER_DEFAULTS:
278        assert TIER_PRO in PROVIDER_DEFAULTS[provider]
279        assert TIER_FLASH in PROVIDER_DEFAULTS[provider]
280        assert TIER_LITE in PROVIDER_DEFAULTS[provider]
281
282
283def test_provider_defaults_models():
284    """Test PROVIDER_DEFAULTS maps to correct model constants."""
285    assert PROVIDER_DEFAULTS["google"][TIER_PRO] == GEMINI_PRO
286    assert PROVIDER_DEFAULTS["google"][TIER_FLASH] == GEMINI_FLASH
287    assert PROVIDER_DEFAULTS["google"][TIER_LITE] == GEMINI_LITE
288
289    assert PROVIDER_DEFAULTS["openai"][TIER_PRO] == GPT_5
290    assert PROVIDER_DEFAULTS["openai"][TIER_FLASH] == GPT_5_MINI
291    assert PROVIDER_DEFAULTS["openai"][TIER_LITE] == GPT_5_NANO
292
293    assert PROVIDER_DEFAULTS["anthropic"][TIER_PRO] == CLAUDE_OPUS_4
294    assert PROVIDER_DEFAULTS["anthropic"][TIER_FLASH] == CLAUDE_SONNET_4
295    assert PROVIDER_DEFAULTS["anthropic"][TIER_LITE] == CLAUDE_HAIKU_4
296
297
298def test_resolve_provider_tier_based(use_fixtures_journal):
299    """Test tier-based resolution."""
300    # test.tier has tier: 1 (pro)
301    provider, model = resolve_provider("test.tier", "generate")
302    assert provider == "google"
303    assert model == GEMINI_PRO
304
305
306def test_resolve_provider_tier_inherit_provider(use_fixtures_journal):
307    """Test tier with inherited provider from type default."""
308    # test.tier.inherit has tier: 3 only, should inherit google from generate default
309    provider, model = resolve_provider("test.tier.inherit", "generate")
310    assert provider == "google"
311    assert model == GEMINI_LITE
312
313    # Same context with cogitate should inherit openai
314    provider, model = resolve_provider("test.tier.inherit", "cogitate")
315    assert provider == "openai"
316    assert model == GPT_5_NANO
317
318
319def test_resolve_provider_tier_with_provider(use_fixtures_journal):
320    """Test tier with explicit provider."""
321    # test.tier.override has provider: openai, tier: 2
322    provider, model = resolve_provider("test.tier.override", "generate")
323    assert provider == "openai"
324    assert model == GPT_5_MINI
325
326
327def test_resolve_provider_tier_glob(use_fixtures_journal):
328    """Test tier-based glob pattern matching."""
329    # observe.* now uses tier: 3 instead of explicit model
330    provider, model = resolve_provider("observe.describe.frame", "generate")
331    assert provider == "google"
332    assert model == GEMINI_LITE
333
334
335def test_resolve_provider_model_overrides_tier(use_fixtures_journal):
336    """Test that explicit model takes precedence over tier."""
337    # test.openai has explicit model, not tier
338    provider, model = resolve_provider("test.openai", "generate")
339    assert provider == "openai"
340    assert model == "gpt-5-mini"
341
342
343def test_resolve_provider_default_tier(use_fixtures_journal):
344    """Test default uses tier-based resolution with config override."""
345    # Generate default is tier: 2, which is overridden in config to custom model
346    provider, model = resolve_provider("unknown.context", "generate")
347    assert provider == "google"
348    assert model == "gemini-custom-flash-test"
349
350
351def test_resolve_provider_config_model_override(use_fixtures_journal):
352    """Test that config models section overrides system defaults."""
353    # test.config.override uses tier: 2, which is overridden in config
354    provider, model = resolve_provider("test.config.override", "generate")
355    assert provider == "google"
356    # Should use the custom model from config, not system default GEMINI_FLASH
357    assert model == "gemini-custom-flash-test"
358    assert model != GEMINI_FLASH
359
360
361def test_resolve_provider_tier_fallback_to_system_default(use_fixtures_journal):
362    """Test that tiers not in config fall back to system defaults."""
363    # test.tier uses tier: 1 (pro), which is NOT overridden in config
364    # Should fall back to system default GEMINI_PRO
365    provider, model = resolve_provider("test.tier", "generate")
366    assert provider == "google"
367    assert model == GEMINI_PRO
368
369
370def test_resolve_provider_invalid_tier(use_fixtures_journal, monkeypatch, tmp_path):
371    """Test that invalid tier values fall back to default tier."""
372    import json
373
374    # Create a config with an invalid tier
375    config_dir = tmp_path / "config"
376    config_dir.mkdir()
377    config = {
378        "providers": {
379            "generate": {"provider": "google", "tier": 2},
380            "contexts": {
381                "test.invalid": {"provider": "google", "tier": 99},
382                "test.string": {"provider": "google", "tier": "flash"},
383            },
384        }
385    }
386    (config_dir / "journal.json").write_text(json.dumps(config))
387    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
388
389    # Invalid tier 99 should fall back to generate default tier (2)
390    provider, model = resolve_provider("test.invalid", "generate")
391    assert provider == "google"
392    assert model == GEMINI_FLASH  # tier 2 system default
393
394    # String tier should also fall back
395    provider, model = resolve_provider("test.string", "generate")
396    assert provider == "google"
397    assert model == GEMINI_FLASH
398
399
400# ---------------------------------------------------------------------------
401# Dynamic context registry tests
402# ---------------------------------------------------------------------------
403
404
405def test_context_registry_includes_prompt_contexts():
406    """Test that registry includes all contexts from PROMPT_PATHS."""
407    from pathlib import Path
408
409    import frontmatter
410
411    registry = get_context_registry()
412    base_dir = Path(__file__).parent.parent
413
414    # All prompt contexts should be in registry with correct tier
415    for rel_path in PROMPT_PATHS:
416        path = base_dir / rel_path
417        post = frontmatter.load(path)
418        meta = post.metadata or {}
419        context = meta.get("context")
420
421        assert context in registry, f"Prompt context {context} not in registry"
422        assert registry[context]["tier"] == meta["tier"]
423
424
425def test_context_registry_includes_categories():
426    """Test that registry includes discovered category contexts."""
427    registry = get_context_registry()
428
429    # Should have category entries (from observe/categories/*.md)
430    category_contexts = [k for k in registry if k.startswith("observe.describe.")]
431
432    # Should have frame + all categories (browsing, code, gaming, etc.)
433    assert len(category_contexts) > 5, "Should discover category contexts"
434
435    # Each category context should have required fields
436    for context in category_contexts:
437        assert "tier" in registry[context]
438        assert "label" in registry[context]
439        assert "group" in registry[context]
440        assert registry[context]["tier"] in (TIER_PRO, TIER_FLASH, TIER_LITE)
441
442
443def test_context_registry_includes_talent_configs():
444    """Test that registry includes discovered talent contexts (agents + generators)."""
445    registry = get_context_registry()
446
447    # Should have talent entries (from talent/*.md and apps/*/talent/*.md)
448    talent_contexts = [k for k in registry if k.startswith("talent.")]
449
450    # Should have multiple talent contexts (agents + generators)
451    assert len(talent_contexts) > 1, "Should discover talent contexts"
452
453    # Should have system talent configs
454    system_talent = [k for k in talent_contexts if k.startswith("talent.system.")]
455    assert len(system_talent) > 0, "Should discover system talent configs"
456
457    # Should have app talent configs
458    app_talent = [
459        k
460        for k in talent_contexts
461        if k.startswith("talent.") and not k.startswith("talent.system.")
462    ]
463    assert len(app_talent) > 0, "Should discover app talent configs"
464
465    # Should include type field for talent contexts
466    for context in talent_contexts:
467        assert "type" in registry[context], f"{context} missing type field"
468
469
470def test_context_registry_structure():
471    """Test that all registry entries have required fields."""
472    registry = get_context_registry()
473    required_keys = {"tier", "label", "group"}
474
475    for context, config in registry.items():
476        assert isinstance(config, dict), f"{context} should be a dict"
477        assert required_keys <= set(config.keys()), (
478            f"{context} missing keys: {required_keys - set(config.keys())}"
479        )
480        assert config["tier"] in (
481            TIER_PRO,
482            TIER_FLASH,
483            TIER_LITE,
484        ), f"{context} has invalid tier: {config['tier']}"
485
486
487def test_context_registry_is_cached():
488    """Test that registry is built once and cached."""
489    registry1 = get_context_registry()
490    registry2 = get_context_registry()
491
492    # Should return the same object (cached)
493    assert registry1 is registry2
494
495
496# ---------------------------------------------------------------------------
497# Model pricing support tests
498# ---------------------------------------------------------------------------
499
500
501def test_all_default_models_have_pricing():
502    """Verify all models in PROVIDER_DEFAULTS have genai-prices support.
503
504    This test ensures that when default models are updated, we catch any
505    missing pricing data early. If this test fails:
506
507    1. Run: make update-prices
508    2. Re-run this test
509    3. If still failing, the model may be too new for genai-prices
510
511    See think/models.py model constants section for more details.
512    """
513    # Collect all unique models from PROVIDER_DEFAULTS
514    all_models = set()
515    for provider_models in PROVIDER_DEFAULTS.values():
516        all_models.update(provider_models.values())
517
518    # Also include the named constants directly (in case they differ)
519    all_models.update(
520        [
521            GEMINI_PRO,
522            GEMINI_FLASH,
523            GEMINI_LITE,
524            GPT_5,
525            GPT_5_MINI,
526            GPT_5_NANO,
527            CLAUDE_OPUS_4,
528            CLAUDE_SONNET_4,
529            CLAUDE_HAIKU_4,
530        ]
531    )
532
533    missing_pricing = []
534    for model in sorted(all_models):
535        token_data = {
536            "model": model,
537            "usage": {
538                "input_tokens": 1000,
539                "output_tokens": 100,
540                "total_tokens": 1100,
541            },
542        }
543        result = calc_token_cost(token_data)
544        if result is None:
545            missing_pricing.append(model)
546
547    if missing_pricing:
548        pytest.fail(
549            f"Models missing genai-prices support: {missing_pricing}\n"
550            "Run 'make update-prices' and re-test. "
551            "If still failing, model may be too new for genai-prices."
552        )
553
554
555# ---------------------------------------------------------------------------
556# get_usage_cost tests
557# ---------------------------------------------------------------------------
558
559
560def test_get_usage_cost_nonexistent_day(use_fixtures_journal):
561    """Test that nonexistent day returns zeros."""
562    result = get_usage_cost("19000101")
563    assert result == {"requests": 0, "tokens": 0, "cost": 0.0}
564
565
566def test_get_usage_cost_day_total(use_fixtures_journal):
567    """Test aggregating all entries for a day."""
568    # 20250823 has test entries with gemini models
569    result = get_usage_cost("20250823")
570    assert result["requests"] > 0
571    assert isinstance(result["tokens"], int)
572    assert isinstance(result["cost"], float)
573
574
575def test_iter_token_log_preserves_type_field(use_fixtures_journal):
576    """Token log iterator should preserve top-level type field."""
577    entries = list(iter_token_log("20250823"))
578    generate_entries = [entry for entry in entries if entry.get("type") == "generate"]
579
580    assert generate_entries
581    assert any(
582        entry.get("context") == "think.detect_created.classify_new_file"
583        for entry in generate_entries
584    )
585
586
587def test_get_usage_cost_context_filter(use_fixtures_journal):
588    """Test filtering by context prefix."""
589    # Filter to test contexts
590    result = get_usage_cost("20250823", context="tests.test_gemini")
591    assert result["requests"] > 0
592
593    # Filter to non-matching context should return zeros
594    result_empty = get_usage_cost("20250823", context="nonexistent.context")
595    assert result_empty["requests"] == 0
596
597
598def test_get_usage_cost_segment_filter(use_fixtures_journal):
599    """Test filtering by segment key."""
600    # Fixture data includes one entry tagged with segment 143022_300
601    result = get_usage_cost("20250823", segment="143022_300")
602    assert result["requests"] == 1
603    assert result["tokens"] == 7000
604    assert result["cost"] > 0.0
605
606
607def test_get_usage_cost_combined_filters(use_fixtures_journal):
608    """Test combined segment and context filters."""
609    # With both filters, entries must match both
610    result = get_usage_cost(
611        "20250823",
612        segment="nonexistent",
613        context="tests.test_gemini",
614    )
615    # Segment doesn't exist, so no matches
616    assert result["requests"] == 0
617
618
619# ---------------------------------------------------------------------------
620# log_token_usage normalization tests
621# ---------------------------------------------------------------------------
622
623
624def test_log_token_usage_computes_total_tokens(tmp_path, monkeypatch):
625    """total_tokens is computed from input+output when missing (Codex CLI format)."""
626    import json
627
628    from think.models import log_token_usage
629
630    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
631
632    # Codex CLI format: no total_tokens
633    log_token_usage(
634        model="gpt-5.2",
635        usage={"input_tokens": 1000, "output_tokens": 200},
636        context="test",
637    )
638
639    log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl")
640    entry = json.loads(log_file.read_text().strip())
641    assert entry["usage"]["total_tokens"] == 1200
642    assert entry["usage"]["input_tokens"] == 1000
643    assert entry["usage"]["output_tokens"] == 200
644
645
646def test_log_token_usage_preserves_existing_total_tokens(tmp_path, monkeypatch):
647    """total_tokens is preserved when already present and non-zero."""
648    import json
649
650    from think.models import log_token_usage
651
652    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
653
654    log_token_usage(
655        model="gpt-5.2",
656        usage={"input_tokens": 1000, "output_tokens": 200, "total_tokens": 1500},
657        context="test",
658    )
659
660    log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl")
661    entry = json.loads(log_file.read_text().strip())
662    assert entry["usage"]["total_tokens"] == 1500
663
664
665def test_log_token_usage_maps_cached_input_tokens(tmp_path, monkeypatch):
666    """cached_input_tokens (Codex CLI format) maps to cached_tokens."""
667    import json
668
669    from think.models import log_token_usage
670
671    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
672
673    log_token_usage(
674        model="gpt-5.2",
675        usage={
676            "input_tokens": 1000,
677            "cached_input_tokens": 800,
678            "output_tokens": 200,
679        },
680        context="test",
681    )
682
683    log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl")
684    entry = json.loads(log_file.read_text().strip())
685    assert entry["usage"]["cached_tokens"] == 800
686    assert entry["usage"]["total_tokens"] == 1200
687
688
689def test_log_token_usage_passes_through_reasoning_tokens(tmp_path, monkeypatch):
690    """reasoning_tokens from provider-normalized usage are preserved in log."""
691    import json
692
693    from think.models import log_token_usage
694
695    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
696
697    # Normalized usage from Google provider (the bug: reasoning_tokens were dropped)
698    log_token_usage(
699        model="gemini-3-flash-preview",
700        usage={
701            "input_tokens": 13319,
702            "output_tokens": 969,
703            "total_tokens": 37878,
704            "reasoning_tokens": 23590,
705        },
706        context="test",
707    )
708
709    log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl")
710    entry = json.loads(log_file.read_text().strip())
711    assert entry["usage"]["reasoning_tokens"] == 23590
712    assert entry["usage"]["total_tokens"] == 37878
713    assert entry["usage"]["input_tokens"] == 13319
714    assert entry["usage"]["output_tokens"] == 969
715
716
717def test_log_token_usage_passes_through_cache_creation_tokens(tmp_path, monkeypatch):
718    """cache_creation_tokens from Anthropic provider are preserved in log."""
719    import json
720
721    from think.models import log_token_usage
722
723    monkeypatch.setenv("_SOLSTONE_JOURNAL_OVERRIDE", str(tmp_path))
724
725    log_token_usage(
726        model="claude-sonnet-4-5",
727        usage={
728            "input_tokens": 5000,
729            "output_tokens": 1000,
730            "total_tokens": 6000,
731            "cached_tokens": 3000,
732            "cache_creation_tokens": 2000,
733        },
734        context="test",
735    )
736
737    log_file = tmp_path / "tokens" / (__import__("time").strftime("%Y%m%d") + ".jsonl")
738    entry = json.loads(log_file.read_text().strip())
739    assert entry["usage"]["cache_creation_tokens"] == 2000
740    assert entry["usage"]["cached_tokens"] == 3000