utils/attribution.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / attribution.ts
at main 393 lines 13 kB view raw
wrap content
oppi.li dump from zip 2d ago
63aada3f
  1import { feature } from 'bun:bundle'
  2import { stat } from 'fs/promises'
  3import { getClientType } from '../bootstrap/state.js'
  4import {
  5  getRemoteSessionUrl,
  6  isRemoteSessionLocal,
  7  PRODUCT_URL,
  8} from '../constants/product.js'
  9import { TERMINAL_OUTPUT_TAGS } from '../constants/xml.js'
 10import type { AppState } from '../state/AppState.js'
 11import { FILE_EDIT_TOOL_NAME } from '../tools/FileEditTool/constants.js'
 12import { FILE_READ_TOOL_NAME } from '../tools/FileReadTool/prompt.js'
 13import { FILE_WRITE_TOOL_NAME } from '../tools/FileWriteTool/prompt.js'
 14import { GLOB_TOOL_NAME } from '../tools/GlobTool/prompt.js'
 15import { GREP_TOOL_NAME } from '../tools/GrepTool/prompt.js'
 16import type { Entry } from '../types/logs.js'
 17import {
 18  type AttributionData,
 19  calculateCommitAttribution,
 20  isInternalModelRepo,
 21  isInternalModelRepoCached,
 22  sanitizeModelName,
 23} from './commitAttribution.js'
 24import { logForDebugging } from './debug.js'
 25import { parseJSONL } from './json.js'
 26import { logError } from './log.js'
 27import {
 28  getCanonicalName,
 29  getMainLoopModel,
 30  getPublicModelDisplayName,
 31  getPublicModelName,
 32} from './model/model.js'
 33import { isMemoryFileAccess } from './sessionFileAccessHooks.js'
 34import { getTranscriptPath } from './sessionStorage.js'
 35import { readTranscriptForLoad } from './sessionStoragePortable.js'
 36import { getInitialSettings } from './settings/settings.js'
 37import { isUndercover } from './undercover.js'
 38
 39export type AttributionTexts = {
 40  commit: string
 41  pr: string
 42}
 43
 44/**
 45 * Returns attribution text for commits and PRs based on user settings.
 46 * Handles:
 47 * - Dynamic model name via getPublicModelName()
 48 * - Custom attribution settings (settings.attribution.commit/pr)
 49 * - Backward compatibility with deprecated includeCoAuthoredBy setting
 50 * - Remote mode: returns session URL for attribution
 51 */
 52export function getAttributionTexts(): AttributionTexts {
 53  if (process.env.USER_TYPE === 'ant' && isUndercover()) {
 54    return { commit: '', pr: '' }
 55  }
 56
 57  if (getClientType() === 'remote') {
 58    const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
 59    if (remoteSessionId) {
 60      const ingressUrl = process.env.SESSION_INGRESS_URL
 61      // Skip for local dev - URLs won't persist
 62      if (!isRemoteSessionLocal(remoteSessionId, ingressUrl)) {
 63        const sessionUrl = getRemoteSessionUrl(remoteSessionId, ingressUrl)
 64        return { commit: sessionUrl, pr: sessionUrl }
 65      }
 66    }
 67    return { commit: '', pr: '' }
 68  }
 69
 70  // @[MODEL LAUNCH]: Update the hardcoded fallback model name below (guards against codename leaks).
 71  // For internal repos, use the real model name. For external repos,
 72  // fall back to "Claude Opus 4.6" for unrecognized models to avoid leaking codenames.
 73  const model = getMainLoopModel()
 74  const isKnownPublicModel = getPublicModelDisplayName(model) !== null
 75  const modelName =
 76    isInternalModelRepoCached() || isKnownPublicModel
 77      ? getPublicModelName(model)
 78      : 'Claude Opus 4.6'
 79  const defaultAttribution = `🤖 Generated with [Claude Code](${PRODUCT_URL})`
 80  const defaultCommit = `Co-Authored-By: ${modelName} <noreply@anthropic.com>`
 81
 82  const settings = getInitialSettings()
 83
 84  // New attribution setting takes precedence over deprecated includeCoAuthoredBy
 85  if (settings.attribution) {
 86    return {
 87      commit: settings.attribution.commit ?? defaultCommit,
 88      pr: settings.attribution.pr ?? defaultAttribution,
 89    }
 90  }
 91
 92  // Backward compatibility: deprecated includeCoAuthoredBy setting
 93  if (settings.includeCoAuthoredBy === false) {
 94    return { commit: '', pr: '' }
 95  }
 96
 97  return { commit: defaultCommit, pr: defaultAttribution }
 98}
 99
100/**
101 * Check if a message content string is terminal output rather than a user prompt.
102 * Terminal output includes bash input/output tags and caveat messages about local commands.
103 */
104function isTerminalOutput(content: string): boolean {
105  for (const tag of TERMINAL_OUTPUT_TAGS) {
106    if (content.includes(`<${tag}>`)) {
107      return true
108    }
109  }
110  return false
111}
112
113/**
114 * Count user messages with visible text content in a list of non-sidechain messages.
115 * Excludes tool_result blocks, terminal output, and empty messages.
116 *
117 * Callers should pass messages already filtered to exclude sidechain messages.
118 */
119export function countUserPromptsInMessages(
120  messages: ReadonlyArray<{ type: string; message?: { content?: unknown } }>,
121): number {
122  let count = 0
123
124  for (const message of messages) {
125    if (message.type !== 'user') {
126      continue
127    }
128
129    const content = message.message?.content
130    if (!content) {
131      continue
132    }
133
134    let hasUserText = false
135
136    if (typeof content === 'string') {
137      if (isTerminalOutput(content)) {
138        continue
139      }
140      hasUserText = content.trim().length > 0
141    } else if (Array.isArray(content)) {
142      hasUserText = content.some(block => {
143        if (!block || typeof block !== 'object' || !('type' in block)) {
144          return false
145        }
146        return (
147          (block.type === 'text' &&
148            typeof block.text === 'string' &&
149            !isTerminalOutput(block.text)) ||
150          block.type === 'image' ||
151          block.type === 'document'
152        )
153      })
154    }
155
156    if (hasUserText) {
157      count++
158    }
159  }
160
161  return count
162}
163
164/**
165 * Count non-sidechain user messages in transcript entries.
166 * Used to calculate the number of "steers" (user prompts - 1).
167 *
168 * Counts user messages that contain actual user-typed text,
169 * excluding tool_result blocks, sidechain messages, and terminal output.
170 */
171function countUserPromptsFromEntries(entries: ReadonlyArray<Entry>): number {
172  const nonSidechain = entries.filter(
173    entry =>
174      entry.type === 'user' && !('isSidechain' in entry && entry.isSidechain),
175  )
176  return countUserPromptsInMessages(nonSidechain)
177}
178
179/**
180 * Get full attribution data from the provided AppState's attribution state.
181 * Uses ALL tracked files from the attribution state (not just staged files)
182 * because for PR attribution, files may not be staged yet.
183 * Returns null if no attribution data is available.
184 */
185async function getPRAttributionData(
186  appState: AppState,
187): Promise<AttributionData | null> {
188  const attribution = appState.attribution
189
190  if (!attribution) {
191    return null
192  }
193
194  // Handle both Map and plain object (in case of serialization)
195  const fileStates = attribution.fileStates
196  const isMap = fileStates instanceof Map
197  const trackedFiles = isMap
198    ? Array.from(fileStates.keys())
199    : Object.keys(fileStates)
200
201  if (trackedFiles.length === 0) {
202    return null
203  }
204
205  try {
206    return await calculateCommitAttribution([attribution], trackedFiles)
207  } catch (error) {
208    logError(error as Error)
209    return null
210  }
211}
212
213const MEMORY_ACCESS_TOOL_NAMES = new Set([
214  FILE_READ_TOOL_NAME,
215  GREP_TOOL_NAME,
216  GLOB_TOOL_NAME,
217  FILE_EDIT_TOOL_NAME,
218  FILE_WRITE_TOOL_NAME,
219])
220
221/**
222 * Count memory file accesses in transcript entries.
223 * Uses the same detection conditions as the PostToolUse session file access hooks.
224 */
225function countMemoryFileAccessFromEntries(
226  entries: ReadonlyArray<Entry>,
227): number {
228  let count = 0
229  for (const entry of entries) {
230    if (entry.type !== 'assistant') continue
231    const content = entry.message?.content
232    if (!Array.isArray(content)) continue
233    for (const block of content) {
234      if (
235        block.type !== 'tool_use' ||
236        !MEMORY_ACCESS_TOOL_NAMES.has(block.name)
237      )
238        continue
239      if (isMemoryFileAccess(block.name, block.input)) count++
240    }
241  }
242  return count
243}
244
245/**
246 * Read session transcript entries and compute prompt count and memory access
247 * count. Pre-compact entries are skipped — the N-shot count and memory-access
248 * count should reflect only the current conversation arc, not accumulated
249 * prompts from before a compaction boundary.
250 */
251async function getTranscriptStats(): Promise<{
252  promptCount: number
253  memoryAccessCount: number
254}> {
255  try {
256    const filePath = getTranscriptPath()
257    const fileSize = (await stat(filePath)).size
258    // Fused reader: attr-snap lines (84% of a long session by bytes) are
259    // skipped at the fd level so peak scales with output, not file size. The
260    // one surviving attr-snap at EOF is a no-op for the count functions
261    // (neither checks type === 'attribution-snapshot'). When the last
262    // boundary has preservedSegment the reader returns full (no truncate);
263    // the findLastIndex below still slices to post-boundary.
264    const scan = await readTranscriptForLoad(filePath, fileSize)
265    const buf = scan.postBoundaryBuf
266    const entries = parseJSONL<Entry>(buf)
267    const lastBoundaryIdx = entries.findLastIndex(
268      e =>
269        e.type === 'system' &&
270        'subtype' in e &&
271        e.subtype === 'compact_boundary',
272    )
273    const postBoundary =
274      lastBoundaryIdx >= 0 ? entries.slice(lastBoundaryIdx + 1) : entries
275    return {
276      promptCount: countUserPromptsFromEntries(postBoundary),
277      memoryAccessCount: countMemoryFileAccessFromEntries(postBoundary),
278    }
279  } catch {
280    return { promptCount: 0, memoryAccessCount: 0 }
281  }
282}
283
284/**
285 * Get enhanced PR attribution text with Claude contribution stats.
286 *
287 * Format: "🤖 Generated with Claude Code (93% 3-shotted by claude-opus-4-5)"
288 *
289 * Rules:
290 * - Shows Claude contribution percentage from commit attribution
291 * - Shows N-shotted where N is the prompt count (1-shotted, 2-shotted, etc.)
292 * - Shows short model name (e.g., claude-opus-4-5)
293 * - Returns default attribution if stats can't be computed
294 *
295 * @param getAppState Function to get the current AppState (from command context)
296 */
297export async function getEnhancedPRAttribution(
298  getAppState: () => AppState,
299): Promise<string> {
300  if (process.env.USER_TYPE === 'ant' && isUndercover()) {
301    return ''
302  }
303
304  if (getClientType() === 'remote') {
305    const remoteSessionId = process.env.CLAUDE_CODE_REMOTE_SESSION_ID
306    if (remoteSessionId) {
307      const ingressUrl = process.env.SESSION_INGRESS_URL
308      // Skip for local dev - URLs won't persist
309      if (!isRemoteSessionLocal(remoteSessionId, ingressUrl)) {
310        return getRemoteSessionUrl(remoteSessionId, ingressUrl)
311      }
312    }
313    return ''
314  }
315
316  const settings = getInitialSettings()
317
318  // If user has custom PR attribution, use that
319  if (settings.attribution?.pr) {
320    return settings.attribution.pr
321  }
322
323  // Backward compatibility: deprecated includeCoAuthoredBy setting
324  if (settings.includeCoAuthoredBy === false) {
325    return ''
326  }
327
328  const defaultAttribution = `🤖 Generated with [Claude Code](${PRODUCT_URL})`
329
330  // Get AppState first
331  const appState = getAppState()
332
333  logForDebugging(
334    `PR Attribution: appState.attribution exists: ${!!appState.attribution}`,
335  )
336  if (appState.attribution) {
337    const fileStates = appState.attribution.fileStates
338    const isMap = fileStates instanceof Map
339    const fileCount = isMap ? fileStates.size : Object.keys(fileStates).length
340    logForDebugging(`PR Attribution: fileStates count: ${fileCount}`)
341  }
342
343  // Get attribution stats (transcript is read once for both prompt count and memory access)
344  const [attributionData, { promptCount, memoryAccessCount }, isInternal] =
345    await Promise.all([
346      getPRAttributionData(appState),
347      getTranscriptStats(),
348      isInternalModelRepo(),
349    ])
350
351  const claudePercent = attributionData?.summary.claudePercent ?? 0
352
353  logForDebugging(
354    `PR Attribution: claudePercent: ${claudePercent}, promptCount: ${promptCount}, memoryAccessCount: ${memoryAccessCount}`,
355  )
356
357  // Get short model name, sanitized for non-internal repos
358  const rawModelName = getCanonicalName(getMainLoopModel())
359  const shortModelName = isInternal
360    ? rawModelName
361    : sanitizeModelName(rawModelName)
362
363  // If no attribution data, return default
364  if (claudePercent === 0 && promptCount === 0 && memoryAccessCount === 0) {
365    logForDebugging('PR Attribution: returning default (no data)')
366    return defaultAttribution
367  }
368
369  // Build the enhanced attribution: "🤖 Generated with Claude Code (93% 3-shotted by claude-opus-4-5, 2 memories recalled)"
370  const memSuffix =
371    memoryAccessCount > 0
372      ? `, ${memoryAccessCount} ${memoryAccessCount === 1 ? 'memory' : 'memories'} recalled`
373      : ''
374  const summary = `🤖 Generated with [Claude Code](${PRODUCT_URL}) (${claudePercent}% ${promptCount}-shotted by ${shortModelName}${memSuffix})`
375
376  // Append trailer lines for squash-merge survival. Only for allowlisted repos
377  // (INTERNAL_MODEL_REPOS) and only in builds with COMMIT_ATTRIBUTION enabled —
378  // attributionTrailer.ts contains excluded strings, so reach it via dynamic
379  // import behind feature(). When the repo is configured with
380  // squash_merge_commit_message=PR_BODY (cli, apps), the PR body becomes the
381  // squash commit body verbatim — trailer lines at the end become proper git
382  // trailers on the squash commit.
383  if (feature('COMMIT_ATTRIBUTION') && isInternal && attributionData) {
384    const { buildPRTrailers } = await import('./attributionTrailer.js')
385    const trailers = buildPRTrailers(attributionData, appState.attribution)
386    const result = `${summary}\n\n${trailers.join('\n')}`
387    logForDebugging(`PR Attribution: returning with trailers: ${result}`)
388    return result
389  }
390
391  logForDebugging(`PR Attribution: returning summary: ${summary}`)
392  return summary
393}