⚡ Zero-dependency plcbundle library exclusively for Bun

flush-pds example

+2 -1
.gitignore
··· 1 node_modules 2 test 3 dist 4 - test-data
··· 1 node_modules 2 test 3 dist 4 + test-data 5 + pds_list
+12
examples/flush-pds.ts
···
··· 1 + // flushes all pds endpoints 2 + 3 + const unique: Array<string> = [] 4 + 5 + export function process({ op }: { op: any }) { 6 + 7 + const endpoint = op.operation.services?.atproto_pds?.endpoint 8 + if (!unique.includes(endpoint)) { 9 + console.log(endpoint) 10 + unique.push(endpoint) 11 + } 12 + }
+1 -1
src/cli.ts
··· 2 3 import { clone } from './cmds/clone'; 4 import { detect } from './cmds/detect'; 5 - import { process as processCmd } from './cmds/process'; 6 import { info } from './cmds/info'; 7 import { verify } from './cmds/verify'; 8 import { exportCmd } from './cmds/export';
··· 2 3 import { clone } from './cmds/clone'; 4 import { detect } from './cmds/detect'; 5 + import { processCmd } from './cmds/process'; 6 import { info } from './cmds/info'; 7 import { verify } from './cmds/verify'; 8 import { exportCmd } from './cmds/export';
+5 -3
src/cmds/common.ts
··· 11 silent: boolean; 12 flush?: boolean; 13 mode: 'detect' | 'process'; 14 onMatch?: (match: any, matchCount: number, matchedBytes: number) => void; 15 } 16 ··· 18 * Common processing logic for both detect and process commands 19 */ 20 export async function processOperations(options: ProcessingOptions) { 21 - const { dir, start, end, modulePath, threads, silent, flush, mode, onMatch } = options; 22 23 const bundle = new PLCBundle(dir); 24 ··· 58 threads, 59 silent, 60 flush, 61 - onProgress: (progressStats: ProcessStats) => { 62 const elapsed = (Date.now() - startTime) / 1000; 63 const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0); 64 const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1); ··· 129 } 130 }, 131 { 132 - onProgress: (progressStats: ProcessStats) => { 133 const elapsed = (Date.now() - startTime) / 1000; 134 const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0); 135 const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1); ··· 182 silent: { type: 'boolean', default: false }, 183 s: { type: 'boolean', default: false }, 184 flush: { type: 'boolean', default: false }, 185 }, 186 strict: false, 187 allowPositionals: true,
··· 11 silent: boolean; 12 flush?: boolean; 13 mode: 'detect' | 'process'; 14 + noProgress?: boolean; 15 onMatch?: (match: any, matchCount: number, matchedBytes: number) => void; 16 } 17 ··· 19 * Common processing logic for both detect and process commands 20 */ 21 export async function processOperations(options: ProcessingOptions) { 22 + const { dir, start, end, modulePath, threads, silent, flush, mode, noProgress, onMatch } = options; 23 24 const bundle = new PLCBundle(dir); 25 ··· 59 threads, 60 silent, 61 flush, 62 + onProgress: noProgress ? undefined : (progressStats: ProcessStats) => { // Check noProgress 63 const elapsed = (Date.now() - startTime) / 1000; 64 const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0); 65 const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1); ··· 130 } 131 }, 132 { 133 + onProgress: noProgress ? undefined : (progressStats: ProcessStats) => { // Check noProgress 134 const elapsed = (Date.now() - startTime) / 1000; 135 const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0); 136 const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1); ··· 183 silent: { type: 'boolean', default: false }, 184 s: { type: 'boolean', default: false }, 185 flush: { type: 'boolean', default: false }, 186 + 'no-progress': { type: 'boolean', default: false }, // Add this 187 }, 188 strict: false, 189 allowPositionals: true,
+4 -1
src/cmds/detect.ts
··· 18 --threads <num> Number of worker threads (default: 1) 19 --flush Output matches immediately (unsorted) 20 -s, --silent Suppress all console output from detect script 21 22 EXAMPLES: 23 plcbundle-bun detect ./detect.ts 24 plcbundle-bun detect ./detect.ts --bundles 1-50 --threads 4 25 - plcbundle-bun detect ./detect.ts --flush --silent 26 `); 27 return; 28 } ··· 40 const threads = parseInt((values.threads as string) || '1'); 41 const silent = Boolean(values.silent || values.s); 42 const flush = Boolean(values.flush); 43 44 const bundle = new PLCBundle(dir); 45 const { start, end } = await parseBundleSelection(values, bundle); ··· 53 threads, 54 silent, 55 flush, 56 mode: 'detect', 57 onMatch: (match) => { 58 console.log(`${match.bundle},${match.position},${match.cid},${match.size},0.95,${match.labels.join(';')}`);
··· 18 --threads <num> Number of worker threads (default: 1) 19 --flush Output matches immediately (unsorted) 20 -s, --silent Suppress all console output from detect script 21 + --no-progress Disable progress output (default: false) 22 23 EXAMPLES: 24 plcbundle-bun detect ./detect.ts 25 plcbundle-bun detect ./detect.ts --bundles 1-50 --threads 4 26 + plcbundle-bun detect ./detect.ts --flush --silent --no-progress 27 `); 28 return; 29 } ··· 41 const threads = parseInt((values.threads as string) || '1'); 42 const silent = Boolean(values.silent || values.s); 43 const flush = Boolean(values.flush); 44 + const noProgress = Boolean(values['no-progress']); // Add this 45 46 const bundle = new PLCBundle(dir); 47 const { start, end } = await parseBundleSelection(values, bundle); ··· 55 threads, 56 silent, 57 flush, 58 + noProgress, // Pass it 59 mode: 'detect', 60 onMatch: (match) => { 61 console.log(`${match.bundle},${match.position},${match.cid},${match.size},0.95,${match.labels.join(';')}`);
+6 -1
src/cmds/process.ts
··· 1 import { parseProcessArgs, parseBundleSelection, processOperations } from './common'; 2 import { PLCBundle } from '../plcbundle'; 3 4 - export async function process(args: string[]) { 5 if (args.includes('-h') || args.includes('--help')) { 6 console.log(` 7 process - Process operations with a custom function ··· 17 --bundles <spec> Bundle selection: number (42) or range (1-50) 18 --threads <num> Number of worker threads (default: 1) 19 -s, --silent Suppress all console output from process script 20 21 EXAMPLES: 22 plcbundle-bun process ./my-processor.ts 23 plcbundle-bun process ./my-processor.ts --bundles 1-50 --threads 4 24 25 PROCESS FUNCTION: 26 export function process({ op, position, bundle, line }) { ··· 42 const dir = (values.dir as string) || './'; 43 const threads = parseInt((values.threads as string) || '1'); 44 const silent = Boolean(values.silent || values.s); 45 46 const bundle = new PLCBundle(dir); 47 const { start, end } = await parseBundleSelection(values, bundle); ··· 54 modulePath: resolvedPath, 55 threads, 56 silent, 57 mode: 'process', 58 }); 59 }
··· 1 + import { exit } from 'process'; 2 import { parseProcessArgs, parseBundleSelection, processOperations } from './common'; 3 import { PLCBundle } from '../plcbundle'; 4 5 + export async function processCmd(args: string[]) { 6 if (args.includes('-h') || args.includes('--help')) { 7 console.log(` 8 process - Process operations with a custom function ··· 18 --bundles <spec> Bundle selection: number (42) or range (1-50) 19 --threads <num> Number of worker threads (default: 1) 20 -s, --silent Suppress all console output from process script 21 + --no-progress Disable progress output (default: false) 22 23 EXAMPLES: 24 plcbundle-bun process ./my-processor.ts 25 plcbundle-bun process ./my-processor.ts --bundles 1-50 --threads 4 26 + plcbundle-bun process ./my-processor.ts --no-progress 27 28 PROCESS FUNCTION: 29 export function process({ op, position, bundle, line }) { ··· 45 const dir = (values.dir as string) || './'; 46 const threads = parseInt((values.threads as string) || '1'); 47 const silent = Boolean(values.silent || values.s); 48 + const noProgress = Boolean(values['no-progress']); // Add this 49 50 const bundle = new PLCBundle(dir); 51 const { start, end } = await parseBundleSelection(values, bundle); ··· 58 modulePath: resolvedPath, 59 threads, 60 silent, 61 + noProgress, // Pass it 62 mode: 'process', 63 }); 64 }
+39 -7
src/plcbundle.ts
··· 307 throw new Error('Multi-threading requires module path. Use: processBundles(start, end, { module: "./detect.ts", threads: 4 })'); 308 } 309 310 // Use workers for multi-threading with module 311 if (threads > 1 && module) { 312 - return await this.processBundlesWorkers(start, end, module, threads, silent, flush, onProgress, onMatch); 313 } 314 315 // Load module if provided but single-threaded 316 if (module && !callback) { 317 - const resolvedPath = Bun.resolveSync(module, process.cwd()); 318 const mod = await import(resolvedPath); 319 - const detectFn = mod.detect || mod.default; 320 321 - callback = (op) => { 322 - detectFn({ op }); 323 }; 324 } 325 ··· 341 threads: number, 342 silent: boolean, 343 flush: boolean, 344 onProgress?: (stats: ProcessStats) => void, 345 onMatch?: (match: any) => void 346 ): Promise<ProcessStats & { matches?: any[] }> { ··· 406 modulePath, 407 silent, 408 flush, 409 }); 410 } 411 ··· 428 } 429 430 // Sort matches if not flushed 431 - if (!flush) { 432 allMatches.sort((a, b) => { 433 if (a.bundle !== b.bundle) return a.bundle - b.bundle; 434 return a.position - b.position; ··· 440 matchCount: 0, 441 totalBytes, 442 matchedBytes: 0, 443 - matches: flush ? undefined : allMatches, 444 }; 445 } 446
··· 307 throw new Error('Multi-threading requires module path. Use: processBundles(start, end, { module: "./detect.ts", threads: 4 })'); 308 } 309 310 + // Determine mode based on what function is exported 311 + let mode: 'detect' | 'process' = 'detect'; 312 + if (module) { 313 + try { 314 + const mod = await import(module); 315 + // If module has 'process' function, use process mode 316 + if (mod.process) { 317 + mode = 'process'; 318 + } else if (mod.detect) { 319 + mode = 'detect'; 320 + } 321 + } catch (e) { 322 + // Default to detect 323 + } 324 + } 325 + 326 // Use workers for multi-threading with module 327 if (threads > 1 && module) { 328 + return await this.processBundlesWorkers( 329 + start, 330 + end, 331 + module, 332 + threads, 333 + silent, 334 + flush, 335 + mode, // Pass mode 336 + onProgress, 337 + onMatch 338 + ); 339 } 340 341 // Load module if provided but single-threaded 342 if (module && !callback) { 343 + const resolvedPath = module; 344 const mod = await import(resolvedPath); 345 + const userFn = mode === 'detect' ? (mod.detect || mod.default) : (mod.process || mod.default); 346 347 + callback = (op, position, bundleNum, line) => { 348 + if (mode === 'detect') { 349 + userFn({ op }); 350 + } else { 351 + userFn({ op, position, bundle: bundleNum, line }); 352 + } 353 }; 354 } 355 ··· 371 threads: number, 372 silent: boolean, 373 flush: boolean, 374 + mode: 'detect' | 'process', // Add mode parameter 375 onProgress?: (stats: ProcessStats) => void, 376 onMatch?: (match: any) => void 377 ): Promise<ProcessStats & { matches?: any[] }> { ··· 437 modulePath, 438 silent, 439 flush, 440 + mode, // Pass mode to worker 441 }); 442 } 443 ··· 460 } 461 462 // Sort matches if not flushed 463 + if (!flush && mode === 'detect') { 464 allMatches.sort((a, b) => { 465 if (a.bundle !== b.bundle) return a.bundle - b.bundle; 466 return a.position - b.position; ··· 472 matchCount: 0, 473 totalBytes, 474 matchedBytes: 0, 475 + matches: flush || mode === 'process' ? undefined : allMatches, 476 }; 477 } 478
+32 -22
src/worker.ts
··· 7 modulePath: string; 8 silent?: boolean; 9 flush?: boolean; 10 } 11 12 export interface WorkerProgress { ··· 38 } 39 40 self.onmessage = async (event: MessageEvent<WorkerTask>) => { 41 - const { dir, start, end, modulePath, silent, flush } = event.data; 42 43 // Override console if silent 44 if (silent) { ··· 52 } as any; 53 } 54 55 - // Load detect function 56 const mod = await import(modulePath); 57 - const detectFn = mod.detect || mod.default; 58 59 let totalOps = 0; 60 let totalBytes = 0; ··· 78 totalBytes += line.length; 79 80 const op = JSON.parse(line); 81 - const labels = detectFn({ op }); 82 83 - if (labels && labels.length > 0) { 84 - const match = { 85 - bundle: bundleNum, 86 - position, 87 - cid: op.cid.slice(-4), 88 - size: line.length, 89 - labels, 90 - }; 91 92 - if (flush) { 93 - // Send match immediately 94 - self.postMessage({ 95 - type: 'match', 96 - ...match, 97 - } as WorkerMatch); 98 - } else { 99 - // Buffer matches 100 - matches.push(match); 101 } 102 } 103 104 if (totalOps % 10000 === 0) { ··· 121 totalBytes, 122 matches: flush ? [] : matches, 123 } as WorkerResult); 124 - };
··· 7 modulePath: string; 8 silent?: boolean; 9 flush?: boolean; 10 + mode?: 'detect' | 'process'; // Add mode parameter 11 } 12 13 export interface WorkerProgress { ··· 39 } 40 41 self.onmessage = async (event: MessageEvent<WorkerTask>) => { 42 + const { dir, start, end, modulePath, silent, flush, mode = 'detect' } = event.data; 43 44 // Override console if silent 45 if (silent) { ··· 53 } as any; 54 } 55 56 + // Load the appropriate function based on mode 57 const mod = await import(modulePath); 58 + const userFn = mode === 'detect' 59 + ? (mod.detect || mod.default) 60 + : (mod.process || mod.default); 61 62 let totalOps = 0; 63 let totalBytes = 0; ··· 81 totalBytes += line.length; 82 83 const op = JSON.parse(line); 84 85 + if (mode === 'detect') { 86 + // Detection mode - look for labels 87 + const labels = userFn({ op }); 88 89 + if (labels && labels.length > 0) { 90 + const match = { 91 + bundle: bundleNum, 92 + position, 93 + cid: op.cid.slice(-4), 94 + size: line.length, 95 + labels, 96 + }; 97 + 98 + if (flush) { 99 + // Send match immediately 100 + self.postMessage({ 101 + type: 'match', 102 + ...match, 103 + } as WorkerMatch); 104 + } else { 105 + // Buffer matches 106 + matches.push(match); 107 + } 108 } 109 + } else { 110 + // Process mode - just call the function 111 + userFn({ op, position, bundle: bundleNum, line }); 112 } 113 114 if (totalOps % 10000 === 0) { ··· 131 totalBytes, 132 matches: flush ? [] : matches, 133 } as WorkerResult); 134 + };
+8 -9
tests/commands.test.ts
··· 1 import { describe, test, expect, beforeEach } from 'bun:test'; 2 import { PLCBundle } from '../src/plcbundle'; 3 import { TEMP_DIR, createMockIndex, createMockOperations } from './setup'; 4 - import { resolve } from 'path'; 5 6 describe('CLI Commands', () => { 7 let detectModulePath: string; ··· 20 await Bun.write(bundle.getBundlePath(i), compressed); 21 } 22 23 - // Create test modules with Bun.resolveSync 24 - detectModulePath = Bun.resolveSync(`${TEMP_DIR}/test-detect.ts`, process.cwd()); 25 await Bun.write(detectModulePath, ` 26 - export function detect({ op }) { 27 return op.did.includes('test') ? ['test'] : []; 28 - } 29 `); 30 31 - processModulePath = Bun.resolveSync(`${TEMP_DIR}/test-process.ts`, process.cwd()); 32 await Bun.write(processModulePath, ` 33 - let count = 0; 34 - export function process({ op }) { 35 count++; 36 - } 37 `); 38 }); 39
··· 1 import { describe, test, expect, beforeEach } from 'bun:test'; 2 import { PLCBundle } from '../src/plcbundle'; 3 import { TEMP_DIR, createMockIndex, createMockOperations } from './setup'; 4 5 describe('CLI Commands', () => { 6 let detectModulePath: string; ··· 19 await Bun.write(bundle.getBundlePath(i), compressed); 20 } 21 22 + // Create test modules - use absolute paths 23 + detectModulePath = `${process.cwd()}/${TEMP_DIR}/test-detect.ts`; 24 await Bun.write(detectModulePath, ` 25 + export function detect({ op }) { 26 return op.did.includes('test') ? ['test'] : []; 27 + } 28 `); 29 30 + processModulePath = `${process.cwd()}/${TEMP_DIR}/test-process.ts`; 31 await Bun.write(processModulePath, ` 32 + let count = 0; 33 + export function process({ op }) { 34 count++; 35 + } 36 `); 37 }); 38
+2 -2
tests/multithread.test.ts
··· 20 await Bun.write(bundle.getBundlePath(i), compressed); 21 } 22 23 - // Create test module with Bun.resolveSync 24 - modulePath = Bun.resolveSync(`${TEMP_DIR}/test-module.ts`, process.cwd()); 25 await Bun.write(modulePath, ` 26 export function detect({ op }) { 27 return op.did.length > 10 ? ['long-did'] : [];
··· 20 await Bun.write(bundle.getBundlePath(i), compressed); 21 } 22 23 + // Create test module - use absolute path 24 + modulePath = `${process.cwd()}/${TEMP_DIR}/test-module.ts`; 25 await Bun.write(modulePath, ` 26 export function detect({ op }) { 27 return op.did.length > 10 ? ['long-did'] : [];
+3 -3
tests/processing.test.ts
··· 196 197 describe('processBundles with module path', () => { 198 test('loads module and calls function', async () => { 199 - // Create a test module 200 - const testModulePath = Bun.resolveSync(`${TEMP_DIR}/test-module.ts`, process.cwd()); 201 await Bun.write(testModulePath, ` 202 export function detect({ op }) { 203 return op.did.startsWith('did:plc:') ? ['test'] : []; ··· 212 }); 213 214 test('supports silent mode', async () => { 215 - // Create absolute path directly (file doesn't exist yet to resolve) 216 const testModulePath = `${process.cwd()}/${TEMP_DIR}/noisy-module.ts`; 217 await Bun.write(testModulePath, ` 218 export function detect({ op }) {
··· 196 197 describe('processBundles with module path', () => { 198 test('loads module and calls function', async () => { 199 + // Create a test module with absolute path 200 + const testModulePath = `${process.cwd()}/${TEMP_DIR}/test-module.ts`; 201 await Bun.write(testModulePath, ` 202 export function detect({ op }) { 203 return op.did.startsWith('did:plc:') ? ['test'] : []; ··· 212 }); 213 214 test('supports silent mode', async () => { 215 + // Create absolute path directly 216 const testModulePath = `${process.cwd()}/${TEMP_DIR}/noisy-module.ts`; 217 await Bun.write(testModulePath, ` 218 export function detect({ op }) {