think/indexer/cli.py at main · solpbc.org/solstone

solpbc.org / solstone
fork atom
personal memory agent
fork atom
solstone / think / indexer / cli.py
at main 224 lines 7.0 kB view raw
wrap content
Jer Miller Rename "topic" to "agent" across the codebase 7w ago
2b9207fd
  1# SPDX-License-Identifier: AGPL-3.0-only
  2# Copyright (c) 2026 sol pbc
  3
  4"""CLI functionality for the indexer."""
  5
  6import argparse
  7from typing import Any
  8
  9from think.utils import get_journal, journal_log, setup_cli
 10
 11from .journal import (
 12    index_file,
 13    reset_journal_index,
 14    scan_journal,
 15    search_counts,
 16    search_journal,
 17)
 18
 19
 20def _format_count_column(
 21    items: list[tuple[str, int]], total: int, top_n: int
 22) -> list[str]:
 23    """Format a column of count items with overflow indicator."""
 24    lines = [f"{name} ({count})" for name, count in items[:top_n]]
 25    if total > top_n:
 26        lines.append(f"... +{total - top_n} more")
 27    return lines
 28
 29
 30def _display_counts(counts: dict[str, Any], top_n: int = 5) -> None:
 31    """Display aggregated counts in a compact table format."""
 32    total = counts["total"]
 33    facets = counts["facets"]  # Counter
 34    agents = counts["agents"]  # Counter
 35    days = counts["days"]  # Counter
 36
 37    print(f"Total: {total:,} chunks\n")
 38
 39    # Build columns
 40    facet_col = _format_count_column(facets.most_common(top_n), len(facets), top_n)
 41    agent_col = _format_count_column(agents.most_common(top_n), len(agents), top_n)
 42    day_col = _format_count_column(
 43        sorted(days.items(), reverse=True)[:top_n], len(days), top_n
 44    )
 45
 46    # Header and rows
 47    print(f"{'Facet':<20} {'Agent':<20} {'Day':<20}")
 48    print("-" * 60)
 49
 50    from itertools import zip_longest
 51
 52    for f, a, d in zip_longest(facet_col, agent_col, day_col, fillvalue=""):
 53        print(f"{f:<20} {a:<20} {d:<20}")
 54
 55    print()
 56
 57
 58def _display_search_results(
 59    results: list[dict[str, Any]], total: int, offset: int
 60) -> None:
 61    """Display search results in a consistent format."""
 62    if total == 0 or not results:
 63        print("No results found")
 64        return
 65
 66    # Show pagination context
 67    start = offset + 1
 68    end = offset + len(results)
 69    print(f"Showing {start}-{end} of {total} results\n")
 70
 71    for idx, r in enumerate(results, start):
 72        meta = r.get("metadata", {})
 73        text = r.get("text", "").replace("\n", " ")
 74        snippet = text[:100] + "..." if len(text) > 100 else text
 75        label = meta.get("agent") or meta.get("time") or ""
 76        facet = meta.get("facet")
 77        facet_str = f" ({facet})" if facet else ""
 78        print(f"{idx}. {meta.get('day')} {label}{facet_str}: {snippet}")
 79
 80
 81def main() -> None:
 82    """Main CLI entry point for the indexer."""
 83    parser = argparse.ArgumentParser(
 84        description="Index journal content (insights, transcripts, events, entities, todos)"
 85    )
 86    parser.add_argument(
 87        "--rescan",
 88        action="store_true",
 89        help="Scan and update index (light mode: today + facets/imports/apps, excludes historical days)",
 90    )
 91    parser.add_argument(
 92        "--rescan-full",
 93        action="store_true",
 94        help="Full rescan including all historical day directories",
 95    )
 96    parser.add_argument(
 97        "--rescan-file",
 98        metavar="PATH",
 99        help="Index a specific file (absolute or journal-relative path)",
100    )
101    parser.add_argument(
102        "--reset",
103        action="store_true",
104        help="Remove the index before rescan",
105    )
106    parser.add_argument(
107        "--day",
108        help="Filter search results by exact YYYYMMDD day",
109    )
110    parser.add_argument(
111        "--day-from",
112        help="Filter search results by date range start (YYYYMMDD, inclusive)",
113    )
114    parser.add_argument(
115        "--day-to",
116        help="Filter search results by date range end (YYYYMMDD, inclusive)",
117    )
118    parser.add_argument(
119        "--facet",
120        help="Filter search results by facet name",
121    )
122    parser.add_argument(
123        "--agent",
124        "-a",
125        help="Filter search results by agent (e.g., 'flow', 'event', 'news')",
126    )
127    parser.add_argument(
128        "--stream",
129        help="Filter search results by stream name (e.g., 'archon', 'import.apple')",
130    )
131    parser.add_argument(
132        "-q",
133        "--query",
134        nargs="?",
135        const="",
136        help="Run query (interactive mode if no query provided)",
137    )
138    parser.add_argument(
139        "--limit",
140        type=int,
141        default=10,
142        help="Maximum number of results to return (default: 10)",
143    )
144    parser.add_argument(
145        "--offset",
146        type=int,
147        default=0,
148        help="Number of results to skip for pagination (default: 0)",
149    )
150    parser.add_argument(
151        "--top",
152        type=int,
153        default=5,
154        help="Number of items to show per count column (default: 5)",
155    )
156
157    args = setup_cli(parser)
158    journal = get_journal()
159
160    if (
161        not args.rescan
162        and not args.rescan_full
163        and not args.rescan_file
164        and not args.reset
165        and args.query is None
166    ):
167        parser.print_help()
168        return
169
170    if args.reset:
171        reset_journal_index(journal)
172
173    if args.rescan_file:
174        # Single file indexing (incompatible with --rescan/--rescan-full)
175        if args.rescan or args.rescan_full:
176            parser.error("--rescan-file cannot be used with --rescan or --rescan-full")
177        try:
178            index_file(journal, args.rescan_file, verbose=args.verbose)
179            journal_log(f"indexer file indexed: {args.rescan_file}")
180        except (ValueError, FileNotFoundError) as e:
181            parser.error(str(e))
182    elif args.rescan or args.rescan_full:
183        changed = scan_journal(journal, verbose=args.verbose, full=args.rescan_full)
184        if changed:
185            journal_log("indexer journal rescan ok")
186
187    if args.query is not None:
188        query_kwargs: dict[str, Any] = {}
189        if args.day:
190            query_kwargs["day"] = args.day
191        if args.day_from:
192            query_kwargs["day_from"] = args.day_from
193        if args.day_to:
194            query_kwargs["day_to"] = args.day_to
195        if args.facet:
196            query_kwargs["facet"] = args.facet
197        if args.agent:
198            query_kwargs["agent"] = args.agent
199        if args.stream:
200            query_kwargs["stream"] = args.stream
201
202        if args.query:
203            # Single query mode - show counts then results
204            counts = search_counts(args.query, **query_kwargs)
205            _display_counts(counts, args.top)
206            total, results = search_journal(
207                args.query, args.limit, args.offset, **query_kwargs
208            )
209            _display_search_results(results, total, args.offset)
210        else:
211            # Interactive mode
212            while True:
213                try:
214                    query = input("search> ").strip()
215                except EOFError:
216                    break
217                if not query:
218                    break
219                counts = search_counts(query, **query_kwargs)
220                _display_counts(counts, args.top)
221                total, results = search_journal(
222                    query, args.limit, args.offset, **query_kwargs
223                )
224                _display_search_results(results, total, args.offset)