source dump of claude code
at main 307 lines 10 kB view raw
1import type { LogOption, SerializedMessage } from '../types/logs.js' 2import { count } from './array.js' 3import { logForDebugging } from './debug.js' 4import { getLogDisplayTitle, logError } from './log.js' 5import { getSmallFastModel } from './model/model.js' 6import { isLiteLog, loadFullLog } from './sessionStorage.js' 7import { sideQuery } from './sideQuery.js' 8import { jsonParse } from './slowOperations.js' 9 10// Limits for transcript extraction 11const MAX_TRANSCRIPT_CHARS = 2000 // Max chars of transcript per session 12const MAX_MESSAGES_TO_SCAN = 100 // Max messages to scan from start/end 13const MAX_SESSIONS_TO_SEARCH = 100 // Max sessions to send to the API 14 15const SESSION_SEARCH_SYSTEM_PROMPT = `Your goal is to find relevant sessions based on a user's search query. 16 17You will be given a list of sessions with their metadata and a search query. Identify which sessions are most relevant to the query. 18 19Each session may include: 20- Title (display name or custom title) 21- Tag (user-assigned category, shown as [tag: name] - users tag sessions with /tag command to categorize them) 22- Branch (git branch name, shown as [branch: name]) 23- Summary (AI-generated summary) 24- First message (beginning of the conversation) 25- Transcript (excerpt of conversation content) 26 27IMPORTANT: Tags are user-assigned labels that indicate the session's topic or category. If the query matches a tag exactly or partially, those sessions should be highly prioritized. 28 29For each session, consider (in order of priority): 301. Exact tag matches (highest priority - user explicitly categorized this session) 312. Partial tag matches or tag-related terms 323. Title matches (custom titles or first message content) 334. Branch name matches 345. Summary and transcript content matches 356. Semantic similarity and related concepts 36 37CRITICAL: Be VERY inclusive in your matching. Include sessions that: 38- Contain the query term anywhere in any field 39- Are semantically related to the query (e.g., "testing" matches sessions about "tests", "unit tests", "QA", etc.) 40- Discuss topics that could be related to the query 41- Have transcripts that mention the concept even in passing 42 43When in doubt, INCLUDE the session. It's better to return too many results than too few. The user can easily scan through results, but missing relevant sessions is frustrating. 44 45Return sessions ordered by relevance (most relevant first). If truly no sessions have ANY connection to the query, return an empty array - but this should be rare. 46 47Respond with ONLY the JSON object, no markdown formatting: 48{"relevant_indices": [2, 5, 0]}` 49 50type AgenticSearchResult = { 51 relevant_indices: number[] 52} 53 54/** 55 * Extracts searchable text content from a message. 56 */ 57function extractMessageText(message: SerializedMessage): string { 58 if (message.type !== 'user' && message.type !== 'assistant') { 59 return '' 60 } 61 62 const content = 'message' in message ? message.message?.content : undefined 63 if (!content) return '' 64 65 if (typeof content === 'string') { 66 return content 67 } 68 69 if (Array.isArray(content)) { 70 return content 71 .map(block => { 72 if (typeof block === 'string') return block 73 if ('text' in block && typeof block.text === 'string') return block.text 74 return '' 75 }) 76 .filter(Boolean) 77 .join(' ') 78 } 79 80 return '' 81} 82 83/** 84 * Extracts a truncated transcript from session messages. 85 */ 86function extractTranscript(messages: SerializedMessage[]): string { 87 if (messages.length === 0) return '' 88 89 // Take messages from start and end to get context 90 const messagesToScan = 91 messages.length <= MAX_MESSAGES_TO_SCAN 92 ? messages 93 : [ 94 ...messages.slice(0, MAX_MESSAGES_TO_SCAN / 2), 95 ...messages.slice(-MAX_MESSAGES_TO_SCAN / 2), 96 ] 97 98 const text = messagesToScan 99 .map(extractMessageText) 100 .filter(Boolean) 101 .join(' ') 102 .replace(/\s+/g, ' ') 103 .trim() 104 105 return text.length > MAX_TRANSCRIPT_CHARS 106 ? text.slice(0, MAX_TRANSCRIPT_CHARS) + '…' 107 : text 108} 109 110/** 111 * Checks if a log contains the query term in any searchable field. 112 */ 113function logContainsQuery(log: LogOption, queryLower: string): boolean { 114 // Check title 115 const title = getLogDisplayTitle(log).toLowerCase() 116 if (title.includes(queryLower)) return true 117 118 // Check custom title 119 if (log.customTitle?.toLowerCase().includes(queryLower)) return true 120 121 // Check tag 122 if (log.tag?.toLowerCase().includes(queryLower)) return true 123 124 // Check branch 125 if (log.gitBranch?.toLowerCase().includes(queryLower)) return true 126 127 // Check summary 128 if (log.summary?.toLowerCase().includes(queryLower)) return true 129 130 // Check first prompt 131 if (log.firstPrompt?.toLowerCase().includes(queryLower)) return true 132 133 // Check transcript (more expensive, do last) 134 if (log.messages && log.messages.length > 0) { 135 const transcript = extractTranscript(log.messages).toLowerCase() 136 if (transcript.includes(queryLower)) return true 137 } 138 139 return false 140} 141 142/** 143 * Performs an agentic search using Claude to find relevant sessions 144 * based on semantic understanding of the query. 145 */ 146export async function agenticSessionSearch( 147 query: string, 148 logs: LogOption[], 149 signal?: AbortSignal, 150): Promise<LogOption[]> { 151 if (!query.trim() || logs.length === 0) { 152 return [] 153 } 154 155 const queryLower = query.toLowerCase() 156 157 // Pre-filter: find sessions that contain the query term 158 // This ensures we search relevant sessions, not just recent ones 159 const matchingLogs = logs.filter(log => logContainsQuery(log, queryLower)) 160 161 // Take up to MAX_SESSIONS_TO_SEARCH matching logs 162 // If fewer matches, fill remaining slots with recent non-matching logs for context 163 let logsToSearch: LogOption[] 164 if (matchingLogs.length >= MAX_SESSIONS_TO_SEARCH) { 165 logsToSearch = matchingLogs.slice(0, MAX_SESSIONS_TO_SEARCH) 166 } else { 167 const nonMatchingLogs = logs.filter( 168 log => !logContainsQuery(log, queryLower), 169 ) 170 const remainingSlots = MAX_SESSIONS_TO_SEARCH - matchingLogs.length 171 logsToSearch = [ 172 ...matchingLogs, 173 ...nonMatchingLogs.slice(0, remainingSlots), 174 ] 175 } 176 177 // Debug: log what data we have 178 logForDebugging( 179 `Agentic search: ${logsToSearch.length}/${logs.length} logs, query="${query}", ` + 180 `matching: ${matchingLogs.length}, with messages: ${count(logsToSearch, l => l.messages?.length > 0)}`, 181 ) 182 183 // Load full logs for lite logs to get transcript content 184 const logsWithTranscriptsPromises = logsToSearch.map(async log => { 185 if (isLiteLog(log)) { 186 try { 187 return await loadFullLog(log) 188 } catch (error) { 189 logError(error as Error) 190 // If loading fails, use the lite log (no transcript) 191 return log 192 } 193 } 194 return log 195 }) 196 const logsWithTranscripts = await Promise.all(logsWithTranscriptsPromises) 197 198 logForDebugging( 199 `Agentic search: loaded ${count(logsWithTranscripts, l => l.messages?.length > 0)}/${logsToSearch.length} logs with transcripts`, 200 ) 201 202 // Build session list for the prompt with all searchable metadata 203 const sessionList = logsWithTranscripts 204 .map((log, index) => { 205 const parts: string[] = [`${index}:`] 206 207 // Title (display title, may be custom or from first prompt) 208 const displayTitle = getLogDisplayTitle(log) 209 parts.push(displayTitle) 210 211 // Custom title if different from display title 212 if (log.customTitle && log.customTitle !== displayTitle) { 213 parts.push(`[custom title: ${log.customTitle}]`) 214 } 215 216 // Tag 217 if (log.tag) { 218 parts.push(`[tag: ${log.tag}]`) 219 } 220 221 // Git branch 222 if (log.gitBranch) { 223 parts.push(`[branch: ${log.gitBranch}]`) 224 } 225 226 // Summary 227 if (log.summary) { 228 parts.push(`- Summary: ${log.summary}`) 229 } 230 231 // First prompt content (truncated) 232 if (log.firstPrompt && log.firstPrompt !== 'No prompt') { 233 parts.push(`- First message: ${log.firstPrompt.slice(0, 300)}`) 234 } 235 236 // Transcript excerpt (if messages are available) 237 if (log.messages && log.messages.length > 0) { 238 const transcript = extractTranscript(log.messages) 239 if (transcript) { 240 parts.push(`- Transcript: ${transcript}`) 241 } 242 } 243 244 return parts.join(' ') 245 }) 246 .join('\n') 247 248 const userMessage = `Sessions: 249${sessionList} 250 251Search query: "${query}" 252 253Find the sessions that are most relevant to this query.` 254 255 // Debug: log first part of the session list 256 logForDebugging( 257 `Agentic search prompt (first 500 chars): ${userMessage.slice(0, 500)}...`, 258 ) 259 260 try { 261 const model = getSmallFastModel() 262 logForDebugging(`Agentic search using model: ${model}`) 263 264 const response = await sideQuery({ 265 model, 266 system: SESSION_SEARCH_SYSTEM_PROMPT, 267 messages: [{ role: 'user', content: userMessage }], 268 signal, 269 querySource: 'session_search', 270 }) 271 272 // Extract the text content from the response 273 const textContent = response.content.find(block => block.type === 'text') 274 if (!textContent || textContent.type !== 'text') { 275 logForDebugging('No text content in agentic search response') 276 return [] 277 } 278 279 // Debug: log the response 280 logForDebugging(`Agentic search response: ${textContent.text}`) 281 282 // Parse the JSON response 283 const jsonMatch = textContent.text.match(/\{[\s\S]*\}/) 284 if (!jsonMatch) { 285 logForDebugging('Could not find JSON in agentic search response') 286 return [] 287 } 288 289 const result: AgenticSearchResult = jsonParse(jsonMatch[0]) 290 const relevantIndices = result.relevant_indices || [] 291 292 // Map indices back to logs (indices are relative to logsWithTranscripts) 293 const relevantLogs = relevantIndices 294 .filter(index => index >= 0 && index < logsWithTranscripts.length) 295 .map(index => logsWithTranscripts[index]!) 296 297 logForDebugging( 298 `Agentic search found ${relevantLogs.length} relevant sessions`, 299 ) 300 301 return relevantLogs 302 } catch (error) { 303 logError(error as Error) 304 logForDebugging(`Agentic search error: ${error}`) 305 return [] 306 } 307}