utils/json.ts at main · oppi.li/claude-code

oppi.li / claude-code
fork atom
source dump of claude code
fork atom
claude-code / utils / json.ts
at main 277 lines 9.1 kB view raw
wrap content
oppi.li dump from zip 2d ago
63aada3f
  1import { open, readFile, stat } from 'fs/promises'
  2import {
  3  applyEdits,
  4  modify,
  5  parse as parseJsonc,
  6} from 'jsonc-parser/lib/esm/main.js'
  7import { stripBOM } from './jsonRead.js'
  8import { logError } from './log.js'
  9import { memoizeWithLRU } from './memoize.js'
 10import { jsonStringify } from './slowOperations.js'
 11
 12type CachedParse = { ok: true; value: unknown } | { ok: false }
 13
 14// Memoized inner parse. Uses a discriminated-union wrapper because:
 15// 1. memoizeWithLRU requires NonNullable<unknown>, but JSON.parse can return
 16//    null (e.g. JSON.parse("null")).
 17// 2. Invalid JSON must also be cached — otherwise repeated calls with the same
 18//    bad string re-parse and re-log every time (behavioral regression vs the
 19//    old lodash memoize which wrapped the entire try/catch).
 20// Bounded to 50 entries to prevent unbounded memory growth — previously this
 21// used lodash memoize which cached every unique JSON string forever (settings,
 22// .mcp.json, notebooks, tool results), causing a significant memory leak.
 23// Note: shouldLogError is intentionally excluded from the cache key (matching
 24// lodash memoize default resolver = first arg only).
 25// Skip caching above this size — the LRU stores the full string as the key,
 26// so a 200KB config file would pin ~10MB in #keyList across 50 slots. Large
 27// inputs like ~/.claude.json also change between reads (numStartups bumps on
 28// every CC startup), so the cache never hits anyway.
 29const PARSE_CACHE_MAX_KEY_BYTES = 8 * 1024
 30
 31function parseJSONUncached(json: string, shouldLogError: boolean): CachedParse {
 32  try {
 33    return { ok: true, value: JSON.parse(stripBOM(json)) }
 34  } catch (e) {
 35    if (shouldLogError) {
 36      logError(e)
 37    }
 38    return { ok: false }
 39  }
 40}
 41
 42const parseJSONCached = memoizeWithLRU(parseJSONUncached, json => json, 50)
 43
 44// Important: memoized for performance (LRU-bounded to 50 entries, small inputs only).
 45export const safeParseJSON = Object.assign(
 46  function safeParseJSON(
 47    json: string | null | undefined,
 48    shouldLogError: boolean = true,
 49  ): unknown {
 50    if (!json) return null
 51    const result =
 52      json.length > PARSE_CACHE_MAX_KEY_BYTES
 53        ? parseJSONUncached(json, shouldLogError)
 54        : parseJSONCached(json, shouldLogError)
 55    return result.ok ? result.value : null
 56  },
 57  { cache: parseJSONCached.cache },
 58)
 59
 60/**
 61 * Safely parse JSON with comments (jsonc).
 62 * This is useful for VS Code configuration files like keybindings.json
 63 * which support comments and other jsonc features.
 64 */
 65export function safeParseJSONC(json: string | null | undefined): unknown {
 66  if (!json) {
 67    return null
 68  }
 69  try {
 70    // Strip BOM before parsing - PowerShell 5.x adds BOM to UTF-8 files
 71    return parseJsonc(stripBOM(json))
 72  } catch (e) {
 73    logError(e)
 74    return null
 75  }
 76}
 77
 78/**
 79 * Modify a jsonc string by adding a new item to an array, preserving comments and formatting.
 80 * @param content The jsonc string to modify
 81 * @param newItem The new item to add to the array
 82 * @returns The modified jsonc string
 83 */
 84/**
 85 * Bun.JSONL.parseChunk if available, false otherwise.
 86 * Supports both strings and Buffers, minimizing memory usage and copies.
 87 * Also handles BOM stripping internally.
 88 */
 89type BunJSONLParseChunk = (
 90  data: string | Buffer,
 91  offset?: number,
 92) => { values: unknown[]; error: null | Error; read: number; done: boolean }
 93
 94const bunJSONLParse: BunJSONLParseChunk | false = (() => {
 95  if (typeof Bun === 'undefined') return false
 96  const b = Bun as Record<string, unknown>
 97  const jsonl = b.JSONL as Record<string, unknown> | undefined
 98  if (!jsonl?.parseChunk) return false
 99  return jsonl.parseChunk as BunJSONLParseChunk
100})()
101
102function parseJSONLBun<T>(data: string | Buffer): T[] {
103  const parse = bunJSONLParse as BunJSONLParseChunk
104  const len = data.length
105  const result = parse(data)
106  if (!result.error || result.done || result.read >= len) {
107    return result.values as T[]
108  }
109  // Had an error mid-stream — collect what we got and keep going
110  let values = result.values as T[]
111  let offset = result.read
112  while (offset < len) {
113    const newlineIndex =
114      typeof data === 'string'
115        ? data.indexOf('\n', offset)
116        : data.indexOf(0x0a, offset)
117    if (newlineIndex === -1) break
118    offset = newlineIndex + 1
119    const next = parse(data, offset)
120    if (next.values.length > 0) {
121      values = values.concat(next.values as T[])
122    }
123    if (!next.error || next.done || next.read >= len) break
124    offset = next.read
125  }
126  return values
127}
128
129function parseJSONLBuffer<T>(buf: Buffer): T[] {
130  const bufLen = buf.length
131  let start = 0
132
133  // Strip UTF-8 BOM (EF BB BF)
134  if (buf[0] === 0xef && buf[1] === 0xbb && buf[2] === 0xbf) {
135    start = 3
136  }
137
138  const results: T[] = []
139  while (start < bufLen) {
140    let end = buf.indexOf(0x0a, start)
141    if (end === -1) end = bufLen
142
143    const line = buf.toString('utf8', start, end).trim()
144    start = end + 1
145    if (!line) continue
146    try {
147      results.push(JSON.parse(line) as T)
148    } catch {
149      // Skip malformed lines
150    }
151  }
152  return results
153}
154
155function parseJSONLString<T>(data: string): T[] {
156  const stripped = stripBOM(data)
157  const len = stripped.length
158  let start = 0
159
160  const results: T[] = []
161  while (start < len) {
162    let end = stripped.indexOf('\n', start)
163    if (end === -1) end = len
164
165    const line = stripped.substring(start, end).trim()
166    start = end + 1
167    if (!line) continue
168    try {
169      results.push(JSON.parse(line) as T)
170    } catch {
171      // Skip malformed lines
172    }
173  }
174  return results
175}
176
177/**
178 * Parses JSONL data from a string or Buffer, skipping malformed lines.
179 * Uses Bun.JSONL.parseChunk when available for better performance,
180 * falls back to indexOf-based scanning otherwise.
181 */
182export function parseJSONL<T>(data: string | Buffer): T[] {
183  if (bunJSONLParse) {
184    return parseJSONLBun<T>(data)
185  }
186  if (typeof data === 'string') {
187    return parseJSONLString<T>(data)
188  }
189  return parseJSONLBuffer<T>(data)
190}
191
192const MAX_JSONL_READ_BYTES = 100 * 1024 * 1024
193
194/**
195 * Reads and parses a JSONL file, reading at most the last 100 MB.
196 * For files larger than 100 MB, reads the tail and skips the first partial line.
197 *
198 * 100 MB is more than sufficient since the longest context window we support
199 * is ~2M tokens, which is well under 100 MB of JSONL.
200 */
201export async function readJSONLFile<T>(filePath: string): Promise<T[]> {
202  const { size } = await stat(filePath)
203  if (size <= MAX_JSONL_READ_BYTES) {
204    return parseJSONL<T>(await readFile(filePath))
205  }
206  await using fd = await open(filePath, 'r')
207  const buf = Buffer.allocUnsafe(MAX_JSONL_READ_BYTES)
208  let totalRead = 0
209  const fileOffset = size - MAX_JSONL_READ_BYTES
210  while (totalRead < MAX_JSONL_READ_BYTES) {
211    const { bytesRead } = await fd.read(
212      buf,
213      totalRead,
214      MAX_JSONL_READ_BYTES - totalRead,
215      fileOffset + totalRead,
216    )
217    if (bytesRead === 0) break
218    totalRead += bytesRead
219  }
220  // Skip the first partial line
221  const newlineIndex = buf.indexOf(0x0a)
222  if (newlineIndex !== -1 && newlineIndex < totalRead - 1) {
223    return parseJSONL<T>(buf.subarray(newlineIndex + 1, totalRead))
224  }
225  return parseJSONL<T>(buf.subarray(0, totalRead))
226}
227
228export function addItemToJSONCArray(content: string, newItem: unknown): string {
229  try {
230    // If the content is empty or whitespace, create a new JSON file
231    if (!content || content.trim() === '') {
232      return jsonStringify([newItem], null, 4)
233    }
234
235    // Strip BOM before parsing - PowerShell 5.x adds BOM to UTF-8 files
236    const cleanContent = stripBOM(content)
237
238    // Parse the content to check if it's valid JSON
239    const parsedContent = parseJsonc(cleanContent)
240
241    // If the parsed content is a valid array, modify it
242    if (Array.isArray(parsedContent)) {
243      // Get the length of the array
244      const arrayLength = parsedContent.length
245
246      // Determine if we are dealing with an empty array
247      const isEmpty = arrayLength === 0
248
249      // If it's an empty array we want to add at index 0, otherwise append to the end
250      const insertPath = isEmpty ? [0] : [arrayLength]
251
252      // Generate edits - we're using isArrayInsertion to add a new item without overwriting existing ones
253      const edits = modify(cleanContent, insertPath, newItem, {
254        formattingOptions: { insertSpaces: true, tabSize: 4 },
255        isArrayInsertion: true,
256      })
257
258      // If edits could not be generated, fall back to manual JSON string manipulation
259      if (!edits || edits.length === 0) {
260        const copy = [...parsedContent, newItem]
261        return jsonStringify(copy, null, 4)
262      }
263
264      // Apply the edits to preserve comments (use cleanContent without BOM)
265      return applyEdits(cleanContent, edits)
266    }
267    // If it's not an array at all, create a new array with the item
268    else {
269      // If the content exists but is not an array, we'll replace it completely
270      return jsonStringify([newItem], null, 4)
271    }
272  } catch (e) {
273    // If parsing fails for any reason, log the error and fallback to creating a new JSON array
274    logError(e)
275    return jsonStringify([newItem], null, 4)
276  }
277}