memdir/memdir.ts at main · nonbinary.computer/claude-code

nonbinary.computer / claude-code
forked from oppi.li/claude-code
fork atom
source dump of claude code
fork atom
claude-code / memdir / memdir.ts
at main 507 lines 21 kB view raw
wrap content
oppi.li dump from zip 11d ago
63aada3f
  1import { feature } from 'bun:bundle'
  2import { join } from 'path'
  3import { getFsImplementation } from '../utils/fsOperations.js'
  4import { getAutoMemPath, isAutoMemoryEnabled } from './paths.js'
  5
  6/* eslint-disable @typescript-eslint/no-require-imports */
  7const teamMemPaths = feature('TEAMMEM')
  8  ? (require('./teamMemPaths.js') as typeof import('./teamMemPaths.js'))
  9  : null
 10
 11import { getKairosActive, getOriginalCwd } from '../bootstrap/state.js'
 12import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
 13/* eslint-enable @typescript-eslint/no-require-imports */
 14import {
 15  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 16  logEvent,
 17} from '../services/analytics/index.js'
 18import { GREP_TOOL_NAME } from '../tools/GrepTool/prompt.js'
 19import { isReplModeEnabled } from '../tools/REPLTool/constants.js'
 20import { logForDebugging } from '../utils/debug.js'
 21import { hasEmbeddedSearchTools } from '../utils/embeddedTools.js'
 22import { isEnvTruthy } from '../utils/envUtils.js'
 23import { formatFileSize } from '../utils/format.js'
 24import { getProjectDir } from '../utils/sessionStorage.js'
 25import { getInitialSettings } from '../utils/settings/settings.js'
 26import {
 27  MEMORY_FRONTMATTER_EXAMPLE,
 28  TRUSTING_RECALL_SECTION,
 29  TYPES_SECTION_INDIVIDUAL,
 30  WHAT_NOT_TO_SAVE_SECTION,
 31  WHEN_TO_ACCESS_SECTION,
 32} from './memoryTypes.js'
 33
 34export const ENTRYPOINT_NAME = 'MEMORY.md'
 35export const MAX_ENTRYPOINT_LINES = 200
 36// ~125 chars/line at 200 lines. At p97 today; catches long-line indexes that
 37// slip past the line cap (p100 observed: 197KB under 200 lines).
 38export const MAX_ENTRYPOINT_BYTES = 25_000
 39const AUTO_MEM_DISPLAY_NAME = 'auto memory'
 40
 41export type EntrypointTruncation = {
 42  content: string
 43  lineCount: number
 44  byteCount: number
 45  wasLineTruncated: boolean
 46  wasByteTruncated: boolean
 47}
 48
 49/**
 50 * Truncate MEMORY.md content to the line AND byte caps, appending a warning
 51 * that names which cap fired. Line-truncates first (natural boundary), then
 52 * byte-truncates at the last newline before the cap so we don't cut mid-line.
 53 *
 54 * Shared by buildMemoryPrompt and claudemd getMemoryFiles (previously
 55 * duplicated the line-only logic).
 56 */
 57export function truncateEntrypointContent(raw: string): EntrypointTruncation {
 58  const trimmed = raw.trim()
 59  const contentLines = trimmed.split('\n')
 60  const lineCount = contentLines.length
 61  const byteCount = trimmed.length
 62
 63  const wasLineTruncated = lineCount > MAX_ENTRYPOINT_LINES
 64  // Check original byte count — long lines are the failure mode the byte cap
 65  // targets, so post-line-truncation size would understate the warning.
 66  const wasByteTruncated = byteCount > MAX_ENTRYPOINT_BYTES
 67
 68  if (!wasLineTruncated && !wasByteTruncated) {
 69    return {
 70      content: trimmed,
 71      lineCount,
 72      byteCount,
 73      wasLineTruncated,
 74      wasByteTruncated,
 75    }
 76  }
 77
 78  let truncated = wasLineTruncated
 79    ? contentLines.slice(0, MAX_ENTRYPOINT_LINES).join('\n')
 80    : trimmed
 81
 82  if (truncated.length > MAX_ENTRYPOINT_BYTES) {
 83    const cutAt = truncated.lastIndexOf('\n', MAX_ENTRYPOINT_BYTES)
 84    truncated = truncated.slice(0, cutAt > 0 ? cutAt : MAX_ENTRYPOINT_BYTES)
 85  }
 86
 87  const reason =
 88    wasByteTruncated && !wasLineTruncated
 89      ? `${formatFileSize(byteCount)} (limit: ${formatFileSize(MAX_ENTRYPOINT_BYTES)}) — index entries are too long`
 90      : wasLineTruncated && !wasByteTruncated
 91        ? `${lineCount} lines (limit: ${MAX_ENTRYPOINT_LINES})`
 92        : `${lineCount} lines and ${formatFileSize(byteCount)}`
 93
 94  return {
 95    content:
 96      truncated +
 97      `\n\n> WARNING: ${ENTRYPOINT_NAME} is ${reason}. Only part of it was loaded. Keep index entries to one line under ~200 chars; move detail into topic files.`,
 98    lineCount,
 99    byteCount,
100    wasLineTruncated,
101    wasByteTruncated,
102  }
103}
104
105/* eslint-disable @typescript-eslint/no-require-imports */
106const teamMemPrompts = feature('TEAMMEM')
107  ? (require('./teamMemPrompts.js') as typeof import('./teamMemPrompts.js'))
108  : null
109/* eslint-enable @typescript-eslint/no-require-imports */
110
111/**
112 * Shared guidance text appended to each memory directory prompt line.
113 * Shipped because Claude was burning turns on `ls`/`mkdir -p` before writing.
114 * Harness guarantees the directory exists via ensureMemoryDirExists().
115 */
116export const DIR_EXISTS_GUIDANCE =
117  'This directory already exists — write to it directly with the Write tool (do not run mkdir or check for its existence).'
118export const DIRS_EXIST_GUIDANCE =
119  'Both directories already exist — write to them directly with the Write tool (do not run mkdir or check for their existence).'
120
121/**
122 * Ensure a memory directory exists. Idempotent — called from loadMemoryPrompt
123 * (once per session via systemPromptSection cache) so the model can always
124 * write without checking existence first. FsOperations.mkdir is recursive
125 * by default and already swallows EEXIST, so the full parent chain
126 * (~/.claude/projects/<slug>/memory/) is created in one call with no
127 * try/catch needed for the happy path.
128 */
129export async function ensureMemoryDirExists(memoryDir: string): Promise<void> {
130  const fs = getFsImplementation()
131  try {
132    await fs.mkdir(memoryDir)
133  } catch (e) {
134    // fs.mkdir already handles EEXIST internally. Anything reaching here is
135    // a real problem (EACCES/EPERM/EROFS) — log so --debug shows why. Prompt
136    // building continues either way; the model's Write will surface the
137    // real perm error (and FileWriteTool does its own mkdir of the parent).
138    const code =
139      e instanceof Error && 'code' in e && typeof e.code === 'string'
140        ? e.code
141        : undefined
142    logForDebugging(
143      `ensureMemoryDirExists failed for ${memoryDir}: ${code ?? String(e)}`,
144      { level: 'debug' },
145    )
146  }
147}
148
149/**
150 * Log memory directory file/subdir counts asynchronously.
151 * Fire-and-forget — doesn't block prompt building.
152 */
153function logMemoryDirCounts(
154  memoryDir: string,
155  baseMetadata: Record<
156    string,
157    | number
158    | boolean
159    | AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
160  >,
161): void {
162  const fs = getFsImplementation()
163  void fs.readdir(memoryDir).then(
164    dirents => {
165      let fileCount = 0
166      let subdirCount = 0
167      for (const d of dirents) {
168        if (d.isFile()) {
169          fileCount++
170        } else if (d.isDirectory()) {
171          subdirCount++
172        }
173      }
174      logEvent('tengu_memdir_loaded', {
175        ...baseMetadata,
176        total_file_count: fileCount,
177        total_subdir_count: subdirCount,
178      })
179    },
180    () => {
181      // Directory unreadable — log without counts
182      logEvent('tengu_memdir_loaded', baseMetadata)
183    },
184  )
185}
186
187/**
188 * Build the typed-memory behavioral instructions (without MEMORY.md content).
189 * Constrains memories to a closed four-type taxonomy (user / feedback / project /
190 * reference) — content that is derivable from the current project state (code
191 * patterns, architecture, git history) is explicitly excluded.
192 *
193 * Individual-only variant: no `## Memory scope` section, no <scope> tags
194 * in type blocks, and team/private qualifiers stripped from examples.
195 *
196 * Used by both buildMemoryPrompt (agent memory, includes content) and
197 * loadMemoryPrompt (system prompt, content injected via user context instead).
198 */
199export function buildMemoryLines(
200  displayName: string,
201  memoryDir: string,
202  extraGuidelines?: string[],
203  skipIndex = false,
204): string[] {
205  const howToSave = skipIndex
206    ? [
207        '## How to save memories',
208        '',
209        'Write each memory to its own file (e.g., `user_role.md`, `feedback_testing.md`) using this frontmatter format:',
210        '',
211        ...MEMORY_FRONTMATTER_EXAMPLE,
212        '',
213        '- Keep the name, description, and type fields in memory files up-to-date with the content',
214        '- Organize memory semantically by topic, not chronologically',
215        '- Update or remove memories that turn out to be wrong or outdated',
216        '- Do not write duplicate memories. First check if there is an existing memory you can update before writing a new one.',
217      ]
218    : [
219        '## How to save memories',
220        '',
221        'Saving a memory is a two-step process:',
222        '',
223        '**Step 1** — write the memory to its own file (e.g., `user_role.md`, `feedback_testing.md`) using this frontmatter format:',
224        '',
225        ...MEMORY_FRONTMATTER_EXAMPLE,
226        '',
227        `**Step 2** — add a pointer to that file in \`${ENTRYPOINT_NAME}\`. \`${ENTRYPOINT_NAME}\` is an index, not a memory — each entry should be one line, under ~150 characters: \`- [Title](file.md) — one-line hook\`. It has no frontmatter. Never write memory content directly into \`${ENTRYPOINT_NAME}\`.`,
228        '',
229        `- \`${ENTRYPOINT_NAME}\` is always loaded into your conversation context — lines after ${MAX_ENTRYPOINT_LINES} will be truncated, so keep the index concise`,
230        '- Keep the name, description, and type fields in memory files up-to-date with the content',
231        '- Organize memory semantically by topic, not chronologically',
232        '- Update or remove memories that turn out to be wrong or outdated',
233        '- Do not write duplicate memories. First check if there is an existing memory you can update before writing a new one.',
234      ]
235
236  const lines: string[] = [
237    `# ${displayName}`,
238    '',
239    `You have a persistent, file-based memory system at \`${memoryDir}\`. ${DIR_EXISTS_GUIDANCE}`,
240    '',
241    "You should build up this memory system over time so that future conversations can have a complete picture of who the user is, how they'd like to collaborate with you, what behaviors to avoid or repeat, and the context behind the work the user gives you.",
242    '',
243    'If the user explicitly asks you to remember something, save it immediately as whichever type fits best. If they ask you to forget something, find and remove the relevant entry.',
244    '',
245    ...TYPES_SECTION_INDIVIDUAL,
246    ...WHAT_NOT_TO_SAVE_SECTION,
247    '',
248    ...howToSave,
249    '',
250    ...WHEN_TO_ACCESS_SECTION,
251    '',
252    ...TRUSTING_RECALL_SECTION,
253    '',
254    '## Memory and other forms of persistence',
255    'Memory is one of several persistence mechanisms available to you as you assist the user in a given conversation. The distinction is often that memory can be recalled in future conversations and should not be used for persisting information that is only useful within the scope of the current conversation.',
256    '- When to use or update a plan instead of memory: If you are about to start a non-trivial implementation task and would like to reach alignment with the user on your approach you should use a Plan rather than saving this information to memory. Similarly, if you already have a plan within the conversation and you have changed your approach persist that change by updating the plan rather than saving a memory.',
257    '- When to use or update tasks instead of memory: When you need to break your work in current conversation into discrete steps or keep track of your progress use tasks instead of saving to memory. Tasks are great for persisting information about the work that needs to be done in the current conversation, but memory should be reserved for information that will be useful in future conversations.',
258    '',
259    ...(extraGuidelines ?? []),
260    '',
261  ]
262
263  lines.push(...buildSearchingPastContextSection(memoryDir))
264
265  return lines
266}
267
268/**
269 * Build the typed-memory prompt with MEMORY.md content included.
270 * Used by agent memory (which has no getClaudeMds() equivalent).
271 */
272export function buildMemoryPrompt(params: {
273  displayName: string
274  memoryDir: string
275  extraGuidelines?: string[]
276}): string {
277  const { displayName, memoryDir, extraGuidelines } = params
278  const fs = getFsImplementation()
279  const entrypoint = memoryDir + ENTRYPOINT_NAME
280
281  // Directory creation is the caller's responsibility (loadMemoryPrompt /
282  // loadAgentMemoryPrompt). Builders only read, they don't mkdir.
283
284  // Read existing memory entrypoint (sync: prompt building is synchronous)
285  let entrypointContent = ''
286  try {
287    // eslint-disable-next-line custom-rules/no-sync-fs
288    entrypointContent = fs.readFileSync(entrypoint, { encoding: 'utf-8' })
289  } catch {
290    // No memory file yet
291  }
292
293  const lines = buildMemoryLines(displayName, memoryDir, extraGuidelines)
294
295  if (entrypointContent.trim()) {
296    const t = truncateEntrypointContent(entrypointContent)
297    const memoryType = displayName === AUTO_MEM_DISPLAY_NAME ? 'auto' : 'agent'
298    logMemoryDirCounts(memoryDir, {
299      content_length: t.byteCount,
300      line_count: t.lineCount,
301      was_truncated: t.wasLineTruncated,
302      was_byte_truncated: t.wasByteTruncated,
303      memory_type:
304        memoryType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
305    })
306    lines.push(`## ${ENTRYPOINT_NAME}`, '', t.content)
307  } else {
308    lines.push(
309      `## ${ENTRYPOINT_NAME}`,
310      '',
311      `Your ${ENTRYPOINT_NAME} is currently empty. When you save new memories, they will appear here.`,
312    )
313  }
314
315  return lines.join('\n')
316}
317
318/**
319 * Assistant-mode daily-log prompt. Gated behind feature('KAIROS').
320 *
321 * Assistant sessions are effectively perpetual, so the agent writes memories
322 * append-only to a date-named log file rather than maintaining MEMORY.md as
323 * a live index. A separate nightly /dream skill distills logs into topic
324 * files + MEMORY.md. MEMORY.md is still loaded into context (via claudemd.ts)
325 * as the distilled index — this prompt only changes where NEW memories go.
326 */
327function buildAssistantDailyLogPrompt(skipIndex = false): string {
328  const memoryDir = getAutoMemPath()
329  // Describe the path as a pattern rather than inlining today's literal path:
330  // this prompt is cached by systemPromptSection('memory', ...) and NOT
331  // invalidated on date change. The model derives the current date from the
332  // date_change attachment (appended at the tail on midnight rollover) rather
333  // than the user-context message — the latter is intentionally left stale to
334  // preserve the prompt cache prefix across midnight.
335  const logPathPattern = join(memoryDir, 'logs', 'YYYY', 'MM', 'YYYY-MM-DD.md')
336
337  const lines: string[] = [
338    '# auto memory',
339    '',
340    `You have a persistent, file-based memory system found at: \`${memoryDir}\``,
341    '',
342    "This session is long-lived. As you work, record anything worth remembering by **appending** to today's daily log file:",
343    '',
344    `\`${logPathPattern}\``,
345    '',
346    "Substitute today's date (from `currentDate` in your context) for `YYYY-MM-DD`. When the date rolls over mid-session, start appending to the new day's file.",
347    '',
348    'Write each entry as a short timestamped bullet. Create the file (and parent directories) on first write if it does not exist. Do not rewrite or reorganize the log — it is append-only. A separate nightly process distills these logs into `MEMORY.md` and topic files.',
349    '',
350    '## What to log',
351    '- User corrections and preferences ("use bun, not npm"; "stop summarizing diffs")',
352    '- Facts about the user, their role, or their goals',
353    '- Project context that is not derivable from the code (deadlines, incidents, decisions and their rationale)',
354    '- Pointers to external systems (dashboards, Linear projects, Slack channels)',
355    '- Anything the user explicitly asks you to remember',
356    '',
357    ...WHAT_NOT_TO_SAVE_SECTION,
358    '',
359    ...(skipIndex
360      ? []
361      : [
362          `## ${ENTRYPOINT_NAME}`,
363          `\`${ENTRYPOINT_NAME}\` is the distilled index (maintained nightly from your logs) and is loaded into your context automatically. Read it for orientation, but do not edit it directly — record new information in today's log instead.`,
364          '',
365        ]),
366    ...buildSearchingPastContextSection(memoryDir),
367  ]
368
369  return lines.join('\n')
370}
371
372/**
373 * Build the "Searching past context" section if the feature gate is enabled.
374 */
375export function buildSearchingPastContextSection(autoMemDir: string): string[] {
376  if (!getFeatureValue_CACHED_MAY_BE_STALE('tengu_coral_fern', false)) {
377    return []
378  }
379  const projectDir = getProjectDir(getOriginalCwd())
380  // Ant-native builds alias grep to embedded ugrep and remove the dedicated
381  // Grep tool, so give the model a real shell invocation there.
382  // In REPL mode, both Grep and Bash are hidden from direct use — the model
383  // calls them from inside REPL scripts, so the grep shell form is what it
384  // will write in the script anyway.
385  const embedded = hasEmbeddedSearchTools() || isReplModeEnabled()
386  const memSearch = embedded
387    ? `grep -rn "<search term>" ${autoMemDir} --include="*.md"`
388    : `${GREP_TOOL_NAME} with pattern="<search term>" path="${autoMemDir}" glob="*.md"`
389  const transcriptSearch = embedded
390    ? `grep -rn "<search term>" ${projectDir}/ --include="*.jsonl"`
391    : `${GREP_TOOL_NAME} with pattern="<search term>" path="${projectDir}/" glob="*.jsonl"`
392  return [
393    '## Searching past context',
394    '',
395    'When looking for past context:',
396    '1. Search topic files in your memory directory:',
397    '```',
398    memSearch,
399    '```',
400    '2. Session transcript logs (last resort — large files, slow):',
401    '```',
402    transcriptSearch,
403    '```',
404    'Use narrow search terms (error messages, file paths, function names) rather than broad keywords.',
405    '',
406  ]
407}
408
409/**
410 * Load the unified memory prompt for inclusion in the system prompt.
411 * Dispatches based on which memory systems are enabled:
412 *   - auto + team: combined prompt (both directories)
413 *   - auto only: memory lines (single directory)
414 * Team memory requires auto memory (enforced by isTeamMemoryEnabled), so
415 * there is no team-only branch.
416 *
417 * Returns null when auto memory is disabled.
418 */
419export async function loadMemoryPrompt(): Promise<string | null> {
420  const autoEnabled = isAutoMemoryEnabled()
421
422  const skipIndex = getFeatureValue_CACHED_MAY_BE_STALE(
423    'tengu_moth_copse',
424    false,
425  )
426
427  // KAIROS daily-log mode takes precedence over TEAMMEM: the append-only
428  // log paradigm does not compose with team sync (which expects a shared
429  // MEMORY.md that both sides read + write). Gating on `autoEnabled` here
430  // means the !autoEnabled case falls through to the tengu_memdir_disabled
431  // telemetry block below, matching the non-KAIROS path.
432  if (feature('KAIROS') && autoEnabled && getKairosActive()) {
433    logMemoryDirCounts(getAutoMemPath(), {
434      memory_type:
435        'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
436    })
437    return buildAssistantDailyLogPrompt(skipIndex)
438  }
439
440  // Cowork injects memory-policy text via env var; thread into all builders.
441  const coworkExtraGuidelines =
442    process.env.CLAUDE_COWORK_MEMORY_EXTRA_GUIDELINES
443  const extraGuidelines =
444    coworkExtraGuidelines && coworkExtraGuidelines.trim().length > 0
445      ? [coworkExtraGuidelines]
446      : undefined
447
448  if (feature('TEAMMEM')) {
449    if (teamMemPaths!.isTeamMemoryEnabled()) {
450      const autoDir = getAutoMemPath()
451      const teamDir = teamMemPaths!.getTeamMemPath()
452      // Harness guarantees these directories exist so the model can write
453      // without checking. The prompt text reflects this ("already exists").
454      // Only creating teamDir is sufficient: getTeamMemPath() is defined as
455      // join(getAutoMemPath(), 'team'), so recursive mkdir of the team dir
456      // creates the auto dir as a side effect. If the team dir ever moves
457      // out from under the auto dir, add a second ensureMemoryDirExists call
458      // for autoDir here.
459      await ensureMemoryDirExists(teamDir)
460      logMemoryDirCounts(autoDir, {
461        memory_type:
462          'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
463      })
464      logMemoryDirCounts(teamDir, {
465        memory_type:
466          'team' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
467      })
468      return teamMemPrompts!.buildCombinedMemoryPrompt(
469        extraGuidelines,
470        skipIndex,
471      )
472    }
473  }
474
475  if (autoEnabled) {
476    const autoDir = getAutoMemPath()
477    // Harness guarantees the directory exists so the model can write without
478    // checking. The prompt text reflects this ("already exists").
479    await ensureMemoryDirExists(autoDir)
480    logMemoryDirCounts(autoDir, {
481      memory_type:
482        'auto' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
483    })
484    return buildMemoryLines(
485      'auto memory',
486      autoDir,
487      extraGuidelines,
488      skipIndex,
489    ).join('\n')
490  }
491
492  logEvent('tengu_memdir_disabled', {
493    disabled_by_env_var: isEnvTruthy(
494      process.env.CLAUDE_CODE_DISABLE_AUTO_MEMORY,
495    ),
496    disabled_by_setting:
497      !isEnvTruthy(process.env.CLAUDE_CODE_DISABLE_AUTO_MEMORY) &&
498      getInitialSettings().autoMemoryEnabled === false,
499  })
500  // Gate on the GB flag directly, not isTeamMemoryEnabled() — that function
501  // checks isAutoMemoryEnabled() first, which is definitionally false in this
502  // branch. We want "was this user in the team-memory cohort at all."
503  if (getFeatureValue_CACHED_MAY_BE_STALE('tengu_herring_clock', false)) {
504    logEvent('tengu_team_memdir_disabled', {})
505  }
506  return null
507}