utils/agenticSessionSearch.ts at main

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / agenticSessionSearch.ts
at main 307 lines 10 kB view raw
wrap content
oppi.li dump from zip 2d ago
63aada3f
  1import type { LogOption, SerializedMessage } from '../types/logs.js'
  2import { count } from './array.js'
  3import { logForDebugging } from './debug.js'
  4import { getLogDisplayTitle, logError } from './log.js'
  5import { getSmallFastModel } from './model/model.js'
  6import { isLiteLog, loadFullLog } from './sessionStorage.js'
  7import { sideQuery } from './sideQuery.js'
  8import { jsonParse } from './slowOperations.js'
  9
 10// Limits for transcript extraction
 11const MAX_TRANSCRIPT_CHARS = 2000 // Max chars of transcript per session
 12const MAX_MESSAGES_TO_SCAN = 100 // Max messages to scan from start/end
 13const MAX_SESSIONS_TO_SEARCH = 100 // Max sessions to send to the API
 14
 15const SESSION_SEARCH_SYSTEM_PROMPT = `Your goal is to find relevant sessions based on a user's search query.
 16
 17You will be given a list of sessions with their metadata and a search query. Identify which sessions are most relevant to the query.
 18
 19Each session may include:
 20- Title (display name or custom title)
 21- Tag (user-assigned category, shown as [tag: name] - users tag sessions with /tag command to categorize them)
 22- Branch (git branch name, shown as [branch: name])
 23- Summary (AI-generated summary)
 24- First message (beginning of the conversation)
 25- Transcript (excerpt of conversation content)
 26
 27IMPORTANT: Tags are user-assigned labels that indicate the session's topic or category. If the query matches a tag exactly or partially, those sessions should be highly prioritized.
 28
 29For each session, consider (in order of priority):
 301. Exact tag matches (highest priority - user explicitly categorized this session)
 312. Partial tag matches or tag-related terms
 323. Title matches (custom titles or first message content)
 334. Branch name matches
 345. Summary and transcript content matches
 356. Semantic similarity and related concepts
 36
 37CRITICAL: Be VERY inclusive in your matching. Include sessions that:
 38- Contain the query term anywhere in any field
 39- Are semantically related to the query (e.g., "testing" matches sessions about "tests", "unit tests", "QA", etc.)
 40- Discuss topics that could be related to the query
 41- Have transcripts that mention the concept even in passing
 42
 43When in doubt, INCLUDE the session. It's better to return too many results than too few. The user can easily scan through results, but missing relevant sessions is frustrating.
 44
 45Return sessions ordered by relevance (most relevant first). If truly no sessions have ANY connection to the query, return an empty array - but this should be rare.
 46
 47Respond with ONLY the JSON object, no markdown formatting:
 48{"relevant_indices": [2, 5, 0]}`
 49
 50type AgenticSearchResult = {
 51  relevant_indices: number[]
 52}
 53
 54/**
 55 * Extracts searchable text content from a message.
 56 */
 57function extractMessageText(message: SerializedMessage): string {
 58  if (message.type !== 'user' && message.type !== 'assistant') {
 59    return ''
 60  }
 61
 62  const content = 'message' in message ? message.message?.content : undefined
 63  if (!content) return ''
 64
 65  if (typeof content === 'string') {
 66    return content
 67  }
 68
 69  if (Array.isArray(content)) {
 70    return content
 71      .map(block => {
 72        if (typeof block === 'string') return block
 73        if ('text' in block && typeof block.text === 'string') return block.text
 74        return ''
 75      })
 76      .filter(Boolean)
 77      .join(' ')
 78  }
 79
 80  return ''
 81}
 82
 83/**
 84 * Extracts a truncated transcript from session messages.
 85 */
 86function extractTranscript(messages: SerializedMessage[]): string {
 87  if (messages.length === 0) return ''
 88
 89  // Take messages from start and end to get context
 90  const messagesToScan =
 91    messages.length <= MAX_MESSAGES_TO_SCAN
 92      ? messages
 93      : [
 94          ...messages.slice(0, MAX_MESSAGES_TO_SCAN / 2),
 95          ...messages.slice(-MAX_MESSAGES_TO_SCAN / 2),
 96        ]
 97
 98  const text = messagesToScan
 99    .map(extractMessageText)
100    .filter(Boolean)
101    .join(' ')
102    .replace(/\s+/g, ' ')
103    .trim()
104
105  return text.length > MAX_TRANSCRIPT_CHARS
106    ? text.slice(0, MAX_TRANSCRIPT_CHARS) + '…'
107    : text
108}
109
110/**
111 * Checks if a log contains the query term in any searchable field.
112 */
113function logContainsQuery(log: LogOption, queryLower: string): boolean {
114  // Check title
115  const title = getLogDisplayTitle(log).toLowerCase()
116  if (title.includes(queryLower)) return true
117
118  // Check custom title
119  if (log.customTitle?.toLowerCase().includes(queryLower)) return true
120
121  // Check tag
122  if (log.tag?.toLowerCase().includes(queryLower)) return true
123
124  // Check branch
125  if (log.gitBranch?.toLowerCase().includes(queryLower)) return true
126
127  // Check summary
128  if (log.summary?.toLowerCase().includes(queryLower)) return true
129
130  // Check first prompt
131  if (log.firstPrompt?.toLowerCase().includes(queryLower)) return true
132
133  // Check transcript (more expensive, do last)
134  if (log.messages && log.messages.length > 0) {
135    const transcript = extractTranscript(log.messages).toLowerCase()
136    if (transcript.includes(queryLower)) return true
137  }
138
139  return false
140}
141
142/**
143 * Performs an agentic search using Claude to find relevant sessions
144 * based on semantic understanding of the query.
145 */
146export async function agenticSessionSearch(
147  query: string,
148  logs: LogOption[],
149  signal?: AbortSignal,
150): Promise<LogOption[]> {
151  if (!query.trim() || logs.length === 0) {
152    return []
153  }
154
155  const queryLower = query.toLowerCase()
156
157  // Pre-filter: find sessions that contain the query term
158  // This ensures we search relevant sessions, not just recent ones
159  const matchingLogs = logs.filter(log => logContainsQuery(log, queryLower))
160
161  // Take up to MAX_SESSIONS_TO_SEARCH matching logs
162  // If fewer matches, fill remaining slots with recent non-matching logs for context
163  let logsToSearch: LogOption[]
164  if (matchingLogs.length >= MAX_SESSIONS_TO_SEARCH) {
165    logsToSearch = matchingLogs.slice(0, MAX_SESSIONS_TO_SEARCH)
166  } else {
167    const nonMatchingLogs = logs.filter(
168      log => !logContainsQuery(log, queryLower),
169    )
170    const remainingSlots = MAX_SESSIONS_TO_SEARCH - matchingLogs.length
171    logsToSearch = [
172      ...matchingLogs,
173      ...nonMatchingLogs.slice(0, remainingSlots),
174    ]
175  }
176
177  // Debug: log what data we have
178  logForDebugging(
179    `Agentic search: ${logsToSearch.length}/${logs.length} logs, query="${query}", ` +
180      `matching: ${matchingLogs.length}, with messages: ${count(logsToSearch, l => l.messages?.length > 0)}`,
181  )
182
183  // Load full logs for lite logs to get transcript content
184  const logsWithTranscriptsPromises = logsToSearch.map(async log => {
185    if (isLiteLog(log)) {
186      try {
187        return await loadFullLog(log)
188      } catch (error) {
189        logError(error as Error)
190        // If loading fails, use the lite log (no transcript)
191        return log
192      }
193    }
194    return log
195  })
196  const logsWithTranscripts = await Promise.all(logsWithTranscriptsPromises)
197
198  logForDebugging(
199    `Agentic search: loaded ${count(logsWithTranscripts, l => l.messages?.length > 0)}/${logsToSearch.length} logs with transcripts`,
200  )
201
202  // Build session list for the prompt with all searchable metadata
203  const sessionList = logsWithTranscripts
204    .map((log, index) => {
205      const parts: string[] = [`${index}:`]
206
207      // Title (display title, may be custom or from first prompt)
208      const displayTitle = getLogDisplayTitle(log)
209      parts.push(displayTitle)
210
211      // Custom title if different from display title
212      if (log.customTitle && log.customTitle !== displayTitle) {
213        parts.push(`[custom title: ${log.customTitle}]`)
214      }
215
216      // Tag
217      if (log.tag) {
218        parts.push(`[tag: ${log.tag}]`)
219      }
220
221      // Git branch
222      if (log.gitBranch) {
223        parts.push(`[branch: ${log.gitBranch}]`)
224      }
225
226      // Summary
227      if (log.summary) {
228        parts.push(`- Summary: ${log.summary}`)
229      }
230
231      // First prompt content (truncated)
232      if (log.firstPrompt && log.firstPrompt !== 'No prompt') {
233        parts.push(`- First message: ${log.firstPrompt.slice(0, 300)}`)
234      }
235
236      // Transcript excerpt (if messages are available)
237      if (log.messages && log.messages.length > 0) {
238        const transcript = extractTranscript(log.messages)
239        if (transcript) {
240          parts.push(`- Transcript: ${transcript}`)
241        }
242      }
243
244      return parts.join(' ')
245    })
246    .join('\n')
247
248  const userMessage = `Sessions:
249${sessionList}
250
251Search query: "${query}"
252
253Find the sessions that are most relevant to this query.`
254
255  // Debug: log first part of the session list
256  logForDebugging(
257    `Agentic search prompt (first 500 chars): ${userMessage.slice(0, 500)}...`,
258  )
259
260  try {
261    const model = getSmallFastModel()
262    logForDebugging(`Agentic search using model: ${model}`)
263
264    const response = await sideQuery({
265      model,
266      system: SESSION_SEARCH_SYSTEM_PROMPT,
267      messages: [{ role: 'user', content: userMessage }],
268      signal,
269      querySource: 'session_search',
270    })
271
272    // Extract the text content from the response
273    const textContent = response.content.find(block => block.type === 'text')
274    if (!textContent || textContent.type !== 'text') {
275      logForDebugging('No text content in agentic search response')
276      return []
277    }
278
279    // Debug: log the response
280    logForDebugging(`Agentic search response: ${textContent.text}`)
281
282    // Parse the JSON response
283    const jsonMatch = textContent.text.match(/\{[\s\S]*\}/)
284    if (!jsonMatch) {
285      logForDebugging('Could not find JSON in agentic search response')
286      return []
287    }
288
289    const result: AgenticSearchResult = jsonParse(jsonMatch[0])
290    const relevantIndices = result.relevant_indices || []
291
292    // Map indices back to logs (indices are relative to logsWithTranscripts)
293    const relevantLogs = relevantIndices
294      .filter(index => index >= 0 && index < logsWithTranscripts.length)
295      .map(index => logsWithTranscripts[index]!)
296
297    logForDebugging(
298      `Agentic search found ${relevantLogs.length} relevant sessions`,
299    )
300
301    return relevantLogs
302  } catch (error) {
303    logError(error as Error)
304    logForDebugging(`Agentic search error: ${error}`)
305    return []
306  }
307}