⚡ Zero-dependency plcbundle library exclusively for Bun

better processBundles & process command

+2 -1
examples/analyze.ts
··· 19 19 withPds: 0, 20 20 }; 21 21 22 - await bundle.processBundles(1, 10, (op) => { 22 + await bundle.processBundles(1, 10, (op, position, bundleNum, line) => { 23 23 stats.totalOps++; 24 24 stats.uniqueDids.add(op.did); 25 25 ··· 37 37 stats.withPds++; 38 38 } 39 39 }); 40 + 40 41 41 42 console.log('📊 Analysis Results\n'); 42 43 console.log(`Total operations: ${stats.totalOps.toLocaleString()}`);
+11
examples/detect-orig.js
··· 1 + export function detect({ op }) { 2 + const labels = []; 3 + 4 + if (op.did.startsWith('did:plc:aa')) { 5 + labels.push('test') 6 + } 7 + 8 + console.log(op.operation.sig) 9 + 10 + return labels; 11 + }
+2 -2
examples/detect.ts
··· 19 19 } 20 20 21 21 // Example: Detect accounts created in 2024 22 - if (op.createdAt?.startsWith('2024')) { 23 - labels.push('created-2024'); 22 + if (op.createdAt?.match(/^\d{4}\-(03|06)/)) { 23 + labels.push('created-march-june'); 24 24 } 25 25 26 26 return labels;
+31
examples/info.ts
··· 1 + /** 2 + * Example: Get repository information 3 + * 4 + * Usage: 5 + * bun examples/info.ts 6 + */ 7 + 8 + import { PLCBundle } from '../src'; 9 + 10 + const bundle = new PLCBundle('./data/bundles'); 11 + 12 + const stats = await bundle.getStats(); 13 + 14 + console.log('📦 Repository Information\n'); 15 + console.log(`Version: ${stats.version}`); 16 + console.log(`Last bundle: ${stats.lastBundle}`); 17 + console.log(`Total bundles: ${stats.totalBundles}`); 18 + console.log(`Total size: ${(stats.totalSize / 1e9).toFixed(2)} GB`); 19 + console.log(`Updated: ${stats.updatedAt}`); 20 + 21 + console.log('\n📊 Sample Bundle Info\n'); 22 + 23 + const metadata = await bundle.getMetadata(1); 24 + if (metadata) { 25 + console.log(`Bundle #${metadata.bundle_number}`); 26 + console.log(` Operations: ${metadata.operation_count.toLocaleString()}`); 27 + console.log(` Unique DIDs: ${metadata.did_count.toLocaleString()}`); 28 + console.log(` Time range: ${metadata.start_time} → ${metadata.end_time}`); 29 + console.log(` Size: ${(metadata.compressed_size / 1e6).toFixed(2)} MB (compressed)`); 30 + console.log(` Size: ${(metadata.uncompressed_size / 1e6).toFixed(2)} MB (uncompressed)`); 31 + }
-32
examples/library-use.js
··· 1 - import { PLCBundle } from './src'; 2 - 3 - // Create bundle reader 4 - const plcbundle = new PLCBundle('./bundles'); 5 - 6 - // Get stats 7 - const stats = await plcbundle.getStats(); 8 - console.log(stats); 9 - 10 - // Stream operations 11 - for await (const op of plcbundle.streamOperations(1)) { 12 - console.log(op.did); 13 - } 14 - 15 - await bundle.processBundles(1, 100, (op, pos, num) => { 16 - // Your logic 17 - }, { 18 - threads: 4, 19 - onProgress: (stats) => { 20 - console.log(`Processed ${stats.totalOps} ops`); 21 - } 22 - }); 23 - 24 - // Clone with progress 25 - await bundle.clone('https://plcbundle.atscan.net', { 26 - threads: 8, 27 - bundles: '1-100', 28 - verify: true, 29 - onProgress: (stats) => { 30 - console.log(`Downloaded ${stats.downloadedBundles}/${stats.totalBundles}`); 31 - } 32 - });
+1 -1
examples/parallel.ts
··· 18 18 19 19 const startTime = Date.now(); 20 20 21 - await bundle.processBundles(1, 50, (op) => { 21 + await bundle.processBundles(1, 50, (op, position, bundleNum, line) => { 22 22 if (op.operation?.alsoKnownAs?.length > 0) { 23 23 matches.withHandle++; 24 24 }
+28
examples/processor.ts
··· 1 + /** 2 + * Example process function for general operation processing 3 + * 4 + * Usage: 5 + * plcbundle-bun process ./examples/processor.ts 6 + */ 7 + 8 + export function process({ op, position, bundle, line }: { 9 + op: any; 10 + position: number; 11 + bundle: number; 12 + line: string; 13 + }) { 14 + // Example: Count operations by year 15 + const year = op.createdAt.substring(0, 4); 16 + 17 + // Example: Log specific operations 18 + if (position % 1000 === 0) { 19 + console.log(`Bundle ${bundle}, position ${position}: ${op.did}`); 20 + } 21 + 22 + // Example: Custom logic 23 + if (op.operation?.alsoKnownAs?.length > 0) { 24 + // Do something with operations that have handles 25 + } 26 + 27 + // No need to return anything 28 + }
+5 -3
src/cli.ts
··· 2 2 3 3 import { clone } from './cmds/clone'; 4 4 import { detect } from './cmds/detect'; 5 + import { process as processCmd } from './cmds/process'; 5 6 import { info } from './cmds/info'; 6 7 import { verify } from './cmds/verify'; 7 8 import { exportCmd } from './cmds/export'; ··· 9 10 const commands = { 10 11 clone, 11 12 detect, 13 + process: processCmd, 12 14 info, 13 15 verify, 14 16 export: exportCmd, ··· 23 25 COMMANDS: 24 26 clone Clone bundles from a remote repository 25 27 detect Detect and filter operations using a custom function 28 + process Process operations with a custom function 26 29 info Show index or bundle information 27 30 verify Verify bundle integrity 28 31 export Export operations from bundle ··· 32 35 33 36 EXAMPLES: 34 37 bun cli clone --remote https://plcbundle.atscan.net 38 + bun cli detect ./examples/detect.ts --bundles 1-100 39 + bun cli process ./my-processor.ts --threads 4 35 40 bun cli info --dir ./bundles 36 - bun cli detect --detect ./examples/detect.ts 37 - bun cli detect --detect ./examples/detect.ts --bundles 1-100 38 - bun cli detect --detect ./examples/detect.ts --bundles 42 --threads 4 39 41 bun cli verify --bundle 42 40 42 bun cli export --bundle 1 > ops.jsonl 41 43 `);
+233
src/cmds/common.ts
··· 1 + import { PLCBundle } from '../plcbundle'; 2 + import type { ProcessStats } from '../types'; 3 + import { parseArgs } from 'util'; 4 + 5 + export interface ProcessingOptions { 6 + dir: string; 7 + start: number; 8 + end: number; 9 + modulePath: string; 10 + threads: number; 11 + silent: boolean; 12 + flush?: boolean; 13 + mode: 'detect' | 'process'; 14 + onMatch?: (match: any, matchCount: number, matchedBytes: number) => void; 15 + } 16 + 17 + /** 18 + * Common processing logic for both detect and process commands 19 + */ 20 + export async function processOperations(options: ProcessingOptions) { 21 + const { dir, start, end, modulePath, threads, silent, flush, mode, onMatch } = options; 22 + 23 + const bundle = new PLCBundle(dir); 24 + 25 + // Save original console 26 + const originalConsole = { 27 + log: console.log, 28 + error: console.error, 29 + warn: console.warn, 30 + info: console.info, 31 + debug: console.debug, 32 + }; 33 + 34 + // Override console if silent 35 + if (silent) { 36 + console.log = () => {}; 37 + console.error = () => {}; 38 + console.warn = () => {}; 39 + console.info = () => {}; 40 + console.debug = () => {}; 41 + } 42 + 43 + try { 44 + originalConsole.error(`Processing bundles ${start}-${end} from ${dir}${threads > 1 ? ` (${threads} threads)` : ''}${silent ? ' (silent)' : ''}\n`); 45 + 46 + if (mode === 'detect') { 47 + originalConsole.log('bundle,position,cid,size,confidence,labels'); 48 + } 49 + 50 + const startTime = Date.now(); 51 + let matchCount = 0; 52 + let matchedBytes = 0; 53 + 54 + if (threads > 1) { 55 + // Multi-threaded mode 56 + const result: any = await bundle.processBundles(start, end, { 57 + module: modulePath, 58 + threads, 59 + silent, 60 + flush, 61 + onProgress: (progressStats: ProcessStats) => { 62 + const elapsed = (Date.now() - startTime) / 1000; 63 + const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0); 64 + const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1); 65 + originalConsole.error(`Processed ${progressStats.totalOps} ops | ${opsPerSec} ops/sec | ${mbPerSec} MB/s\r`); 66 + }, 67 + onMatch: flush && onMatch ? (match) => { 68 + matchCount++; 69 + matchedBytes += match.size; 70 + onMatch(match, matchCount, matchedBytes); 71 + } : undefined, 72 + }); 73 + 74 + // Output buffered matches (if not flushed) 75 + if (!flush && result.matches && onMatch) { 76 + for (const match of result.matches) { 77 + matchCount++; 78 + matchedBytes += match.size; 79 + onMatch(match, matchCount, matchedBytes); 80 + } 81 + } 82 + 83 + const elapsed = (Date.now() - startTime) / 1000; 84 + const opsPerSec = (result.totalOps / elapsed).toFixed(0); 85 + const mbPerSec = (result.totalBytes / elapsed / 1e6).toFixed(1); 86 + 87 + originalConsole.error('\n'); 88 + originalConsole.error(`✓ ${mode === 'detect' ? 'Detection' : 'Processing'} complete`); 89 + originalConsole.error(` Total operations: ${result.totalOps.toLocaleString()}`); 90 + 91 + if (mode === 'detect') { 92 + originalConsole.error(` Matches found: ${matchCount.toLocaleString()} (${(matchCount/result.totalOps*100).toFixed(2)}%)`); 93 + originalConsole.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/result.totalBytes*100).toFixed(2)}%)`); 94 + } 95 + 96 + originalConsole.error(` Total size: ${(result.totalBytes / 1e6).toFixed(1)} MB`); 97 + originalConsole.error(''); 98 + originalConsole.error(` Time elapsed: ${elapsed.toFixed(2)}s`); 99 + originalConsole.error(` Throughput: ${opsPerSec} ops/sec | ${mbPerSec} MB/s`); 100 + originalConsole.error(` Threads: ${threads}`); 101 + } else { 102 + // Single-threaded mode 103 + const mod = await import(modulePath); 104 + const userFn = mode === 'detect' ? (mod.detect || mod.default) : (mod.process || mod.default); 105 + 106 + const finalStats = await bundle.processBundles( 107 + start, 108 + end, 109 + (op, position, bundleNum, line) => { 110 + if (mode === 'detect') { 111 + const labels = userFn({ op }); 112 + 113 + if (labels && labels.length > 0) { 114 + matchCount++; 115 + matchedBytes += line.length; 116 + if (onMatch) { 117 + onMatch({ 118 + bundle: bundleNum, 119 + position, 120 + cid: op.cid.slice(-4), 121 + size: line.length, 122 + labels 123 + }, matchCount, matchedBytes); 124 + } 125 + } 126 + } else { 127 + // Process mode - just call function 128 + userFn({ op, position, bundle: bundleNum, line }); 129 + } 130 + }, 131 + { 132 + onProgress: (progressStats: ProcessStats) => { 133 + const elapsed = (Date.now() - startTime) / 1000; 134 + const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0); 135 + const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1); 136 + originalConsole.error(`Processed ${progressStats.totalOps} ops | ${opsPerSec} ops/sec | ${mbPerSec} MB/s\r`); 137 + }, 138 + } 139 + ); 140 + 141 + const elapsed = (Date.now() - startTime) / 1000; 142 + const opsPerSec = (finalStats.totalOps / elapsed).toFixed(0); 143 + const mbPerSec = (finalStats.totalBytes / elapsed / 1e6).toFixed(1); 144 + 145 + originalConsole.error('\n'); 146 + originalConsole.error(`✓ ${mode === 'detect' ? 'Detection' : 'Processing'} complete`); 147 + originalConsole.error(` Total operations: ${finalStats.totalOps.toLocaleString()}`); 148 + 149 + if (mode === 'detect') { 150 + originalConsole.error(` Matches found: ${matchCount.toLocaleString()} (${(matchCount/finalStats.totalOps*100).toFixed(2)}%)`); 151 + originalConsole.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/finalStats.totalBytes*100).toFixed(2)}%)`); 152 + } 153 + 154 + originalConsole.error(` Total size: ${(finalStats.totalBytes / 1e6).toFixed(1)} MB`); 155 + originalConsole.error(''); 156 + originalConsole.error(` Time elapsed: ${elapsed.toFixed(2)}s`); 157 + originalConsole.error(` Throughput: ${opsPerSec} ops/sec | ${mbPerSec} MB/s`); 158 + if (threads > 1) { 159 + originalConsole.error(` Threads: ${threads}`); 160 + } 161 + } 162 + } finally { 163 + // Restore console 164 + console.log = originalConsole.log; 165 + console.error = originalConsole.error; 166 + console.warn = originalConsole.warn; 167 + console.info = originalConsole.info; 168 + console.debug = originalConsole.debug; 169 + } 170 + } 171 + 172 + /** 173 + * Common argument parsing for process/detect commands 174 + */ 175 + export function parseProcessArgs(args: string[]) { 176 + const { values, positionals } = parseArgs({ 177 + args, 178 + options: { 179 + dir: { type: 'string', default: './' }, 180 + bundles: { type: 'string' }, 181 + threads: { type: 'string', default: '1' }, 182 + silent: { type: 'boolean', default: false }, 183 + s: { type: 'boolean', default: false }, 184 + flush: { type: 'boolean', default: false }, 185 + }, 186 + strict: false, 187 + allowPositionals: true, 188 + }); 189 + 190 + return { values, positionals }; 191 + } 192 + 193 + /** 194 + * Parse bundle selection from values 195 + */ 196 + export async function parseBundleSelection( 197 + values: any, 198 + bundle: PLCBundle 199 + ): Promise<{ start: number; end: number }> { 200 + const stats = await bundle.getStats(); 201 + 202 + let start: number, end: number; 203 + 204 + if (values.bundles && typeof values.bundles === 'string') { 205 + const bundleSpec = values.bundles; 206 + 207 + if (bundleSpec.includes('-')) { 208 + const [startStr, endStr] = bundleSpec.split('-'); 209 + start = parseInt(startStr.trim()); 210 + end = parseInt(endStr.trim()); 211 + 212 + if (isNaN(start) || isNaN(end)) { 213 + throw new Error(`Invalid bundle range: ${bundleSpec}`); 214 + } 215 + } else { 216 + start = parseInt(bundleSpec); 217 + end = start; 218 + 219 + if (isNaN(start)) { 220 + throw new Error(`Invalid bundle number: ${bundleSpec}`); 221 + } 222 + } 223 + } else { 224 + start = 1; 225 + end = stats.lastBundle; 226 + } 227 + 228 + if (start < 1 || end > stats.lastBundle || start > end) { 229 + throw new Error(`Invalid bundle range ${start}-${end} (available: 1-${stats.lastBundle})`); 230 + } 231 + 232 + return { start, end }; 233 + }
+34 -111
src/cmds/detect.ts
··· 1 - import { parseArgs } from 'util'; 1 + import { parseProcessArgs, parseBundleSelection, processOperations } from './common'; 2 2 import { PLCBundle } from '../plcbundle'; 3 - import type { ProcessStats } from '../types'; 4 3 5 4 export async function detect(args: string[]) { 6 5 if (args.includes('-h') || args.includes('--help')) { 7 6 console.log(` 8 7 detect - Detect and filter operations using a custom function 9 8 9 + USAGE: 10 + plcbundle-bun detect <module> [options] 11 + 12 + ARGUMENTS: 13 + <module> Path to detect function module (required) 14 + 10 15 OPTIONS: 11 16 --dir <path> Bundle directory (default: ./) 12 17 --bundles <spec> Bundle selection: number (42) or range (1-50) 13 - If not specified, processes all available bundles 14 - --detect <path> Path to detect function module (required) 15 18 --threads <num> Number of worker threads (default: 1) 19 + --flush Output matches immediately (unsorted) 20 + -s, --silent Suppress all console output from detect script 16 21 17 22 EXAMPLES: 18 - bun cli detect --detect ./detect.ts # All bundles 19 - bun cli detect --detect ./detect.ts --bundles 42 # Single bundle 20 - bun cli detect --detect ./detect.ts --bundles 1-50 # Range 21 - bun cli detect --detect ./detect.ts --threads 4 # Multi-threaded 23 + plcbundle-bun detect ./detect.ts 24 + plcbundle-bun detect ./detect.ts --bundles 1-50 --threads 4 25 + plcbundle-bun detect ./detect.ts --flush --silent 22 26 `); 23 27 return; 24 28 } 25 29 26 - const { values } = parseArgs({ 27 - args, 28 - options: { 29 - dir: { type: 'string', default: './' }, 30 - bundles: { type: 'string' }, 31 - detect: { type: 'string' }, 32 - threads: { type: 'string', default: '1' }, 33 - }, 34 - strict: false, 35 - }); 30 + const { values, positionals } = parseProcessArgs(args); 31 + const modulePath = positionals[0]; 32 + 33 + if (!modulePath) { 34 + console.error('Error: module path is required'); 35 + console.error('Usage: plcbundle-bun detect <module> [options]'); 36 + process.exit(1); 37 + } 36 38 37 39 const dir = (values.dir as string) || './'; 38 40 const threads = parseInt((values.threads as string) || '1'); 41 + const silent = Boolean(values.silent || values.s); 42 + const flush = Boolean(values.flush); 39 43 40 - if (!values.detect || typeof values.detect !== 'string') { 41 - console.error('Error: --detect is required'); 42 - process.exit(1); 43 - } 44 - 45 - // Load bundle instance to get index 46 44 const bundle = new PLCBundle(dir); 47 - const stats = await bundle.getStats(); 45 + const { start, end } = await parseBundleSelection(values, bundle); 46 + const resolvedPath = Bun.resolveSync(modulePath, process.cwd()); 48 47 49 - // Parse bundle selection 50 - let start: number, end: number; 51 - 52 - if (values.bundles && typeof values.bundles === 'string') { 53 - const bundleSpec = values.bundles; 54 - 55 - if (bundleSpec.includes('-')) { 56 - const [startStr, endStr] = bundleSpec.split('-'); 57 - start = parseInt(startStr.trim()); 58 - end = parseInt(endStr.trim()); 59 - 60 - if (isNaN(start) || isNaN(end)) { 61 - console.error(`Error: Invalid bundle range: ${bundleSpec}`); 62 - process.exit(1); 63 - } 64 - } else { 65 - start = parseInt(bundleSpec); 66 - end = start; 67 - 68 - if (isNaN(start)) { 69 - console.error(`Error: Invalid bundle number: ${bundleSpec}`); 70 - process.exit(1); 71 - } 72 - } 73 - } else { 74 - start = 1; 75 - end = stats.lastBundle; 76 - } 77 - 78 - // Validate range 79 - if (start < 1 || end > stats.lastBundle || start > end) { 80 - console.error(`Error: Invalid bundle range ${start}-${end} (available: 1-${stats.lastBundle})`); 81 - process.exit(1); 82 - } 83 - 84 - // Resolve and load detect function 85 - const detectPath = Bun.resolveSync(values.detect, process.cwd()); 86 - const mod = await import(detectPath); 87 - const detectFn = mod.detect || mod.default; 88 - 89 - console.error(`Processing bundles ${start}-${end} from ${dir}${threads > 1 ? ` (${threads} threads)` : ''}\n`); 90 - console.log('bundle,position,cid,size,confidence,labels'); 91 - 92 - let matchCount = 0; 93 - let matchedBytes = 0; 94 - const startTime = Date.now(); 95 - 96 - // Process bundles with progress 97 - const finalStats = await bundle.processBundles( 48 + await processOperations({ 49 + dir, 98 50 start, 99 51 end, 100 - (op, position, bundleNum) => { 101 - const opSize = JSON.stringify(op).length; 102 - const labels = detectFn({ op }); 103 - 104 - if (labels && labels.length > 0) { 105 - matchCount++; 106 - matchedBytes += opSize; 107 - const cidShort = op.cid.slice(-4); 108 - console.log(`${bundleNum},${position},${cidShort},${opSize},0.95,${labels.join(';')}`); 109 - } 52 + modulePath: resolvedPath, 53 + threads, 54 + silent, 55 + flush, 56 + mode: 'detect', 57 + onMatch: (match) => { 58 + console.log(`${match.bundle},${match.position},${match.cid},${match.size},0.95,${match.labels.join(';')}`); 110 59 }, 111 - { 112 - threads, 113 - onProgress: (progressStats: ProcessStats) => { 114 - const elapsed = (Date.now() - startTime) / 1000; 115 - const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0); 116 - const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1); 117 - console.error(`Processed ${progressStats.totalOps} ops | ${opsPerSec} ops/sec | ${mbPerSec} MB/s\r`); 118 - }, 119 - } 120 - ); 121 - 122 - const elapsed = (Date.now() - startTime) / 1000; 123 - const opsPerSec = (finalStats.totalOps / elapsed).toFixed(0); 124 - const mbPerSec = (finalStats.totalBytes / elapsed / 1e6).toFixed(1); 125 - 126 - console.error('\n'); 127 - console.error('✓ Detection complete'); 128 - console.error(` Total operations: ${finalStats.totalOps.toLocaleString()}`); 129 - console.error(` Matches found: ${matchCount.toLocaleString()} (${(matchCount/finalStats.totalOps*100).toFixed(2)}%)`); 130 - console.error(` Total size: ${(finalStats.totalBytes / 1e6).toFixed(1)} MB`); 131 - console.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/finalStats.totalBytes*100).toFixed(2)}%)`); 132 - console.error(''); 133 - console.error(` Time elapsed: ${elapsed.toFixed(2)}s`); 134 - console.error(` Throughput: ${opsPerSec} ops/sec | ${mbPerSec} MB/s`); 135 - if (threads > 1) { 136 - console.error(` Threads: ${threads}`); 137 - } 60 + }); 138 61 }
+59
src/cmds/process.ts
··· 1 + import { parseProcessArgs, parseBundleSelection, processOperations } from './common'; 2 + import { PLCBundle } from '../plcbundle'; 3 + 4 + export async function process(args: string[]) { 5 + if (args.includes('-h') || args.includes('--help')) { 6 + console.log(` 7 + process - Process operations with a custom function 8 + 9 + USAGE: 10 + plcbundle-bun process <module> [options] 11 + 12 + ARGUMENTS: 13 + <module> Path to process function module (required) 14 + 15 + OPTIONS: 16 + --dir <path> Bundle directory (default: ./) 17 + --bundles <spec> Bundle selection: number (42) or range (1-50) 18 + --threads <num> Number of worker threads (default: 1) 19 + -s, --silent Suppress all console output from process script 20 + 21 + EXAMPLES: 22 + plcbundle-bun process ./my-processor.ts 23 + plcbundle-bun process ./my-processor.ts --bundles 1-50 --threads 4 24 + 25 + PROCESS FUNCTION: 26 + export function process({ op, position, bundle, line }) { 27 + // Your logic here 28 + } 29 + `); 30 + return; 31 + } 32 + 33 + const { values, positionals } = parseProcessArgs(args); 34 + const modulePath = positionals[0]; 35 + 36 + if (!modulePath) { 37 + console.error('Error: module path is required'); 38 + console.error('Usage: plcbundle-bun process <module> [options]'); 39 + process.exit(1); 40 + } 41 + 42 + const dir = (values.dir as string) || './'; 43 + const threads = parseInt((values.threads as string) || '1'); 44 + const silent = Boolean(values.silent || values.s); 45 + 46 + const bundle = new PLCBundle(dir); 47 + const { start, end } = await parseBundleSelection(values, bundle); 48 + const resolvedPath = Bun.resolveSync(modulePath, process.cwd()); 49 + 50 + await processOperations({ 51 + dir, 52 + start, 53 + end, 54 + modulePath: resolvedPath, 55 + threads, 56 + silent, 57 + mode: 'process', 58 + }); 59 + }
+180 -46
src/plcbundle.ts
··· 244 244 * } 245 245 * ``` 246 246 */ 247 - async *streamOperations(bundleNum: number): AsyncGenerator<Operation> { 247 + async *streamOperations(bundleNum: number): AsyncGenerator<{ op: Operation; line: string }> { 248 248 const content = await this.readBundle(bundleNum); 249 249 const lines = content.split('\n'); 250 250 251 251 for (const line of lines) { 252 252 if (line.trim()) { 253 - yield JSON.parse(line); 253 + yield { op: JSON.parse(line), line }; 254 254 } 255 255 } 256 256 } 257 257 258 258 /** 259 - * Process multiple bundles with a callback function. 259 + * Process multiple bundles with a callback or module. 260 260 * 261 - * Supports both single-threaded and multi-threaded processing via {@link ProcessOptions}. 262 - * Operations are processed in chronological order. 261 + * Two modes: 262 + * 1. **Direct callback** (single-threaded): Pass callback function 263 + * 2. **Module path** (multi-threaded): Pass module path in options 263 264 * 264 - * @param start - First bundle number to process (inclusive) 265 - * @param end - Last bundle number to process (inclusive) 266 - * @param callback - Function called for each operation 267 - * @param options - Processing options (threads, progress callback) 265 + * @param start - First bundle number 266 + * @param end - Last bundle number 267 + * @param callbackOrOptions - Callback function OR options object with module path 268 + * @param options - Processing options (only if callback is provided) 268 269 * @returns Promise resolving to processing statistics 269 270 * 270 - * @example Single-threaded 271 + * @example Single-threaded with callback 271 272 * ```ts 272 - * await bundle.processBundles(1, 10, (op, pos, num) => { 273 - * console.log(`Bundle ${num}, pos ${pos}: ${op.did}`); 273 + * await bundle.processBundles(1, 10, (op, pos, num, line) => { 274 + * console.log(op.did); 274 275 * }); 275 276 * ``` 276 277 * 277 - * @example Multi-threaded with progress 278 + * @example Multi-threaded with module 278 279 * ```ts 279 - * await bundle.processBundles(1, 100, (op) => { 280 - * // Process operation 281 - * }, { 280 + * await bundle.processBundles(1, 100, { 281 + * module: './detect.ts', 282 282 * threads: 4, 283 - * onProgress: (stats) => { 284 - * console.log(`Processed ${stats.totalOps} operations`); 285 - * } 283 + * onProgress: (stats) => console.log(stats.totalOps) 286 284 * }); 287 285 * ``` 288 286 */ 289 287 async processBundles( 290 288 start: number, 291 289 end: number, 292 - callback: ProcessCallback, 293 - options: ProcessOptions = {} 294 - ): Promise<ProcessStats> { 295 - const { threads = 1, onProgress } = options; 290 + callbackOrOptions: ProcessCallback | ProcessOptions, 291 + options?: ProcessOptions 292 + ): Promise<ProcessStats & { matches?: any[] }> { 293 + let callback: ProcessCallback | undefined; 294 + let processOptions: ProcessOptions; 296 295 297 - if (threads > 1) { 298 - return await this.processBundlesMultiThreaded(start, end, callback, threads, onProgress); 296 + if (typeof callbackOrOptions === 'function') { 297 + callback = callbackOrOptions; 298 + processOptions = options || {}; 299 299 } else { 300 - return await this.processBundlesSingleThreaded(start, end, callback, onProgress); 300 + processOptions = callbackOrOptions; 301 + } 302 + 303 + const { threads = 1, module, silent = false, flush = false, onProgress, onMatch } = processOptions; 304 + 305 + // Validation: multi-threading requires module 306 + if (threads > 1 && !module) { 307 + throw new Error('Multi-threading requires module path. Use: processBundles(start, end, { module: "./detect.ts", threads: 4 })'); 308 + } 309 + 310 + // Use workers for multi-threading with module 311 + if (threads > 1 && module) { 312 + return await this.processBundlesWorkers(start, end, module, threads, silent, flush, onProgress, onMatch); 301 313 } 314 + 315 + // Load module if provided but single-threaded 316 + if (module && !callback) { 317 + const resolvedPath = Bun.resolveSync(module, process.cwd()); 318 + const mod = await import(resolvedPath); 319 + const detectFn = mod.detect || mod.default; 320 + 321 + callback = (op) => { 322 + detectFn({ op }); 323 + }; 324 + } 325 + 326 + if (!callback) { 327 + throw new Error('Either callback function or module path must be provided'); 328 + } 329 + 330 + // Single-threaded fast path 331 + return await this.processBundlesFast(start, end, callback, onProgress); 302 332 } 303 333 304 334 /** 305 - * Single-threaded processing 335 + * Multi-threaded processing using workers (optimized) 306 336 */ 307 - private async processBundlesSingleThreaded( 337 + private async processBundlesWorkers( 338 + start: number, 339 + end: number, 340 + modulePath: string, 341 + threads: number, 342 + silent: boolean, 343 + flush: boolean, 344 + onProgress?: (stats: ProcessStats) => void, 345 + onMatch?: (match: any) => void 346 + ): Promise<ProcessStats & { matches?: any[] }> { 347 + const totalBundles = end - start + 1; 348 + const bundlesPerThread = Math.ceil(totalBundles / threads); 349 + 350 + const workerPath = new URL('./worker.ts', import.meta.url).pathname; 351 + const workers: Worker[] = []; 352 + const workerStats: Array<{ totalOps: number; totalBytes: number }> = []; 353 + 354 + let aggregatedOps = 0; 355 + let aggregatedBytes = 0; 356 + const allMatches: any[] = []; 357 + 358 + const workerPromises = []; 359 + 360 + for (let i = 0; i < threads; i++) { 361 + const threadStart = start + i * bundlesPerThread; 362 + const threadEnd = Math.min(threadStart + bundlesPerThread - 1, end); 363 + 364 + if (threadStart > end) break; 365 + 366 + const worker = new Worker(workerPath); 367 + workers.push(worker); 368 + 369 + workerStats[i] = { totalOps: 0, totalBytes: 0 }; 370 + 371 + const promise = new Promise<any>((resolve) => { 372 + worker.onmessage = (event) => { 373 + const msg = event.data; 374 + 375 + if (msg.type === 'progress') { 376 + const oldStats = workerStats[i]; 377 + aggregatedOps += msg.totalOps - oldStats.totalOps; 378 + aggregatedBytes += msg.totalBytes - oldStats.totalBytes; 379 + workerStats[i] = { totalOps: msg.totalOps, totalBytes: msg.totalBytes }; 380 + 381 + if (onProgress) { 382 + onProgress({ 383 + totalOps: aggregatedOps, 384 + matchCount: 0, 385 + totalBytes: aggregatedBytes, 386 + matchedBytes: 0, 387 + }); 388 + } 389 + } else if (msg.type === 'match') { 390 + // Handle flushed match - call callback immediately 391 + if (onMatch) { 392 + onMatch(msg); 393 + } 394 + } else if (msg.type === 'result') { 395 + resolve(msg); 396 + } 397 + }; 398 + }); 399 + 400 + workerPromises.push(promise); 401 + 402 + worker.postMessage({ 403 + dir: this.dir, 404 + start: threadStart, 405 + end: threadEnd, 406 + modulePath, 407 + silent, 408 + flush, 409 + }); 410 + } 411 + 412 + // Wait for all workers 413 + const results = await Promise.all(workerPromises); 414 + 415 + // Cleanup 416 + workers.forEach(w => w.terminate()); 417 + 418 + // Aggregate results 419 + let totalOps = 0; 420 + let totalBytes = 0; 421 + 422 + for (const result of results) { 423 + totalOps += result.totalOps; 424 + totalBytes += result.totalBytes; 425 + if (!flush) { 426 + allMatches.push(...result.matches); 427 + } 428 + } 429 + 430 + // Sort matches if not flushed 431 + if (!flush) { 432 + allMatches.sort((a, b) => { 433 + if (a.bundle !== b.bundle) return a.bundle - b.bundle; 434 + return a.position - b.position; 435 + }); 436 + } 437 + 438 + return { 439 + totalOps, 440 + matchCount: 0, 441 + totalBytes, 442 + matchedBytes: 0, 443 + matches: flush ? undefined : allMatches, 444 + }; 445 + } 446 + 447 + /** 448 + * Fast single-threaded processing (optimized) 449 + */ 450 + private async processBundlesFast( 308 451 start: number, 309 452 end: number, 310 453 callback: ProcessCallback, ··· 318 461 }; 319 462 320 463 for (let bundleNum = start; bundleNum <= end; bundleNum++) { 321 - let position = 0; 464 + const content = await this.readBundle(bundleNum); 465 + const lines = content.split('\n'); 322 466 323 - for await (const op of this.streamOperations(bundleNum)) { 467 + for (let position = 0; position < lines.length; position++) { 468 + const line = lines[position]; 469 + if (!line.trim()) continue; 470 + 324 471 stats.totalOps++; 325 - stats.totalBytes += JSON.stringify(op).length; 472 + stats.totalBytes += line.length; 326 473 327 - await callback(op, position++, bundleNum); 474 + const op = JSON.parse(line); 475 + await callback(op, position, bundleNum, line); 328 476 329 477 if (onProgress && stats.totalOps % 10000 === 0) { 330 478 onProgress({ ...stats }); ··· 333 481 } 334 482 335 483 return stats; 336 - } 337 - 338 - /** 339 - * Multi-threaded processing 340 - */ 341 - private async processBundlesMultiThreaded( 342 - start: number, 343 - end: number, 344 - callback: ProcessCallback, 345 - threads: number, 346 - onProgress?: (stats: ProcessStats) => void 347 - ): Promise<ProcessStats> { 348 - // Simplified implementation - full multi-threading requires callback serialization 349 - return await this.processBundlesSingleThreaded(start, end, callback, onProgress); 350 484 } 351 485 352 486 /**
+95 -15
src/types.ts
··· 105 105 * @param op - The operation being processed 106 106 * @param position - Zero-based position of the operation within its bundle 107 107 * @param bundleNum - The bundle number being processed 108 - * 109 - * @example 110 - * ```ts 111 - * const callback: ProcessCallback = (op, position, bundleNum) => { 112 - * if (op.did.startsWith('did:plc:test')) { 113 - * console.log(`Found test DID at bundle ${bundleNum}, position ${position}`); 114 - * } 115 - * }; 116 - * ``` 108 + * @param line - The raw JSONL line (for getting size without re-serializing) 117 109 */ 118 110 export type ProcessCallback = ( 119 111 op: Operation, 120 112 position: number, 121 - bundleNum: number 113 + bundleNum: number, 114 + line: string 122 115 ) => void | Promise<void>; 123 116 124 117 /** ··· 141 134 } 142 135 143 136 /** 144 - * Options for processing bundles. 137 + * Unified options for processing bundles. 145 138 * 146 - * Allows customization of parallel processing and progress reporting. 139 + * Supports both callback functions (single-threaded) and module paths (multi-threaded). 147 140 */ 148 141 export interface ProcessOptions { 149 - /** Number of worker threads to use for parallel processing (default: 1) */ 142 + /** 143 + * Number of worker threads (default: 1). 144 + * If > 1, requires `module` instead of passing callback directly. 145 + */ 150 146 threads?: number; 151 147 152 148 /** 153 - * Callback invoked periodically to report processing progress. 154 - * Called approximately every 10,000 operations. 149 + * Path to module exporting a `detect` or default function. 150 + * Required when threads > 1. The module should export: 151 + * ```ts 152 + * export function detect({ op }) { return [...labels]; } 153 + * ``` 154 + */ 155 + module?: string; 156 + 157 + /** 158 + * Suppress all console output from the detect script (default: false). 159 + * 160 + * When enabled, console.log, console.error, etc. from the detect function 161 + * are silenced. Progress reporting and final statistics are still shown. 162 + * 163 + * @example 164 + * ```ts 165 + * await bundle.processBundles(1, 10, { 166 + * module: './noisy-detect.ts', 167 + * silent: true // Suppress debug output 168 + * }); 169 + * ``` 170 + */ 171 + silent?: boolean; 172 + 173 + /** 174 + * Output matches immediately without buffering or sorting (default: false). 175 + * 176 + * When enabled, matches are output as soon as they're found rather than 177 + * being collected, sorted, and output at the end. Useful for: 178 + * - Real-time streaming output 179 + * - Reducing memory usage with large result sets 180 + * - Early access to results 181 + * 182 + * Note: With flush enabled, matches may be out of chronological order 183 + * when using multiple threads. 184 + * 185 + * @example 186 + * ```ts 187 + * await bundle.processBundles(1, 100, { 188 + * module: './detect.ts', 189 + * threads: 4, 190 + * flush: true, 191 + * onMatch: (match) => { 192 + * console.log(`Found: ${match.bundle}/${match.position}`); 193 + * } 194 + * }); 195 + * ``` 196 + */ 197 + flush?: boolean; 198 + 199 + /** 200 + * Progress callback invoked periodically during processing. 201 + * 202 + * Called approximately every 10,000 operations to report current 203 + * processing statistics. 155 204 * 156 205 * @param stats - Current processing statistics 206 + * 207 + * @example 208 + * ```ts 209 + * await bundle.processBundles(1, 100, callback, { 210 + * onProgress: (stats) => { 211 + * console.log(`${stats.totalOps} operations processed`); 212 + * } 213 + * }); 214 + * ``` 157 215 */ 158 216 onProgress?: (stats: ProcessStats) => void; 217 + 218 + /** 219 + * Match callback invoked for each match when flush mode is enabled. 220 + * 221 + * Only called when `flush: true`. Receives match objects as they're 222 + * found, without waiting for sorting. Ignored in non-flush mode. 223 + * 224 + * @param match - The match object with bundle, position, cid, size, and labels 225 + * 226 + * @example 227 + * ```ts 228 + * await bundle.processBundles(1, 100, { 229 + * module: './detect.ts', 230 + * flush: true, 231 + * onMatch: (match) => { 232 + * // Output immediately 233 + * console.log(`${match.bundle},${match.position},${match.labels}`); 234 + * } 235 + * }); 236 + * ``` 237 + */ 238 + onMatch?: (match: any) => void; 159 239 } 160 240 161 241 /**
+77 -46
src/worker.ts
··· 1 1 /// <reference lib="webworker" /> 2 2 3 - import { PLCBundle } from './plcbundle'; 4 - import type { Operation } from './types'; 5 - 6 3 export interface WorkerTask { 7 4 dir: string; 8 5 start: number; 9 6 end: number; 10 - detectPath: string; 7 + modulePath: string; 8 + silent?: boolean; 9 + flush?: boolean; 11 10 } 12 11 13 12 export interface WorkerProgress { 14 13 type: 'progress'; 15 14 totalOps: number; 16 - matchCount: number; 17 15 totalBytes: number; 18 - matchedBytes: number; 16 + } 17 + 18 + export interface WorkerMatch { 19 + type: 'match'; 20 + bundle: number; 21 + position: number; 22 + cid: string; 23 + size: number; 24 + labels: string[]; 19 25 } 20 26 21 27 export interface WorkerResult { 22 28 type: 'result'; 23 29 totalOps: number; 24 - matchCount: number; 25 30 totalBytes: number; 26 - matchedBytes: number; 27 31 matches: Array<{ 28 32 bundle: number; 29 33 position: number; ··· 33 37 }>; 34 38 } 35 39 36 - // Worker message handler 37 40 self.onmessage = async (event: MessageEvent<WorkerTask>) => { 38 - const { dir, start, end, detectPath } = event.data; 41 + const { dir, start, end, modulePath, silent, flush } = event.data; 39 42 40 - const bundle = new PLCBundle(dir); 43 + // Override console if silent 44 + if (silent) { 45 + globalThis.console = { 46 + log: () => {}, 47 + error: () => {}, 48 + warn: () => {}, 49 + info: () => {}, 50 + debug: () => {}, 51 + trace: () => {}, 52 + } as any; 53 + } 41 54 42 55 // Load detect function 43 - const mod = await import(detectPath); 44 - const detect = mod.detect || mod.default; 56 + const mod = await import(modulePath); 57 + const detectFn = mod.detect || mod.default; 45 58 46 59 let totalOps = 0; 47 - let matchCount = 0; 48 60 let totalBytes = 0; 49 - let matchedBytes = 0; 50 61 const matches: any[] = []; 51 62 52 - await bundle.processBundles(start, end, (op: Operation, position: number, bundleNum: number) => { 53 - totalOps++; 54 - const opSize = JSON.stringify(op).length; 55 - totalBytes += opSize; 63 + for (let bundleNum = start; bundleNum <= end; bundleNum++) { 64 + const bundlePath = `${dir}${bundleNum.toString().padStart(6, '0')}.jsonl.zst`; 56 65 57 - const labels = detect({ op }); 58 - 59 - if (labels && labels.length > 0) { 60 - matchCount++; 61 - matchedBytes += opSize; 66 + try { 67 + const compressed = await Bun.file(bundlePath).arrayBuffer(); 68 + const decompressed = Bun.zstdDecompressSync(compressed); 69 + const text = new TextDecoder().decode(decompressed); 70 + 71 + const lines = text.split('\n'); 62 72 63 - matches.push({ 64 - bundle: bundleNum, 65 - position, 66 - cid: op.cid.slice(-4), 67 - size: opSize, 68 - labels, 69 - }); 70 - } 71 - 72 - // Send progress every 10000 operations 73 - if (totalOps % 10000 === 0) { 74 - self.postMessage({ 75 - type: 'progress', 76 - totalOps, 77 - matchCount, 78 - totalBytes, 79 - matchedBytes, 80 - } as WorkerProgress); 73 + for (let position = 0; position < lines.length; position++) { 74 + const line = lines[position]; 75 + if (!line.trim()) continue; 76 + 77 + totalOps++; 78 + totalBytes += line.length; 79 + 80 + const op = JSON.parse(line); 81 + const labels = detectFn({ op }); 82 + 83 + if (labels && labels.length > 0) { 84 + const match = { 85 + bundle: bundleNum, 86 + position, 87 + cid: op.cid.slice(-4), 88 + size: line.length, 89 + labels, 90 + }; 91 + 92 + if (flush) { 93 + // Send match immediately 94 + self.postMessage({ 95 + type: 'match', 96 + ...match, 97 + } as WorkerMatch); 98 + } else { 99 + // Buffer matches 100 + matches.push(match); 101 + } 102 + } 103 + 104 + if (totalOps % 10000 === 0) { 105 + self.postMessage({ 106 + type: 'progress', 107 + totalOps, 108 + totalBytes, 109 + } as WorkerProgress); 110 + } 111 + } 112 + } catch (error) { 113 + // Continue on error 81 114 } 82 - }); 115 + } 83 116 84 117 // Send final result 85 118 self.postMessage({ 86 119 type: 'result', 87 120 totalOps, 88 - matchCount, 89 121 totalBytes, 90 - matchedBytes, 91 - matches, 122 + matches: flush ? [] : matches, 92 123 } as WorkerResult); 93 124 };
+78
tests/commands.test.ts
··· 1 + import { describe, test, expect, beforeEach } from 'bun:test'; 2 + import { PLCBundle } from '../src/plcbundle'; 3 + import { TEMP_DIR, createMockIndex, createMockOperations } from './setup'; 4 + import { resolve } from 'path'; 5 + 6 + describe('CLI Commands', () => { 7 + let detectModulePath: string; 8 + let processModulePath: string; 9 + 10 + beforeEach(async () => { 11 + const bundle = new PLCBundle(TEMP_DIR); 12 + const mockIndex = createMockIndex(); 13 + await bundle.saveIndex(mockIndex); 14 + 15 + // Create test bundles 16 + for (let i = 1; i <= 3; i++) { 17 + const operations = createMockOperations(100); 18 + const jsonl = operations.map(op => JSON.stringify(op)).join('\n') + '\n'; 19 + const compressed = Bun.zstdCompressSync(new TextEncoder().encode(jsonl)); 20 + await Bun.write(bundle.getBundlePath(i), compressed); 21 + } 22 + 23 + // Create test modules with Bun.resolveSync 24 + detectModulePath = Bun.resolveSync(`${TEMP_DIR}/test-detect.ts`, process.cwd()); 25 + await Bun.write(detectModulePath, ` 26 + export function detect({ op }) { 27 + return op.did.includes('test') ? ['test'] : []; 28 + } 29 + `); 30 + 31 + processModulePath = Bun.resolveSync(`${TEMP_DIR}/test-process.ts`, process.cwd()); 32 + await Bun.write(processModulePath, ` 33 + let count = 0; 34 + export function process({ op }) { 35 + count++; 36 + } 37 + `); 38 + }); 39 + 40 + describe('detect command', () => { 41 + test('module can be imported and has detect function', async () => { 42 + const mod = await import(detectModulePath); 43 + expect(mod.detect).toBeDefined(); 44 + expect(typeof mod.detect).toBe('function'); 45 + 46 + const result = mod.detect({ op: { did: 'test123' } }); 47 + expect(Array.isArray(result)).toBe(true); 48 + }); 49 + }); 50 + 51 + describe('process command', () => { 52 + test('module can be imported and has process function', async () => { 53 + const mod = await import(processModulePath); 54 + expect(mod.process).toBeDefined(); 55 + expect(typeof mod.process).toBe('function'); 56 + }); 57 + }); 58 + 59 + describe('module loading', () => { 60 + test('detect module returns labels array', async () => { 61 + const mod = await import(detectModulePath); 62 + 63 + const result1 = mod.detect({ op: { did: 'did:plc:test123' } }); 64 + expect(result1).toContain('test'); 65 + 66 + const result2 = mod.detect({ op: { did: 'did:plc:abc' } }); 67 + expect(result2).toEqual([]); 68 + }); 69 + 70 + test('process module executes without errors', async () => { 71 + const mod = await import(processModulePath); 72 + 73 + // Should not throw 74 + mod.process({ op: { did: 'test' }, position: 0, bundle: 1, line: '{}' }); 75 + expect(true).toBe(true); 76 + }); 77 + }); 78 + });
+78
tests/common.test.ts
··· 1 + import { describe, test, expect, beforeEach } from 'bun:test'; 2 + import { parseProcessArgs, parseBundleSelection } from '../src/cmds/common'; 3 + import { PLCBundle } from '../src/plcbundle'; 4 + import { TEMP_DIR, createMockIndex } from './setup'; 5 + 6 + describe('Common CLI Logic', () => { 7 + describe('parseProcessArgs', () => { 8 + test('parses positional arguments', () => { 9 + const { values, positionals } = parseProcessArgs(['./detect.ts', '--threads', '4']); 10 + 11 + expect(positionals[0]).toBe('./detect.ts'); 12 + expect(values.threads).toBe('4'); 13 + }); 14 + 15 + test('parses boolean flags', () => { 16 + const { values } = parseProcessArgs(['./detect.ts', '--silent', '--flush']); 17 + 18 + expect(values.silent).toBe(true); 19 + expect(values.flush).toBe(true); 20 + }); 21 + 22 + test('handles short flags', () => { 23 + const { values } = parseProcessArgs(['./detect.ts', '-s']); 24 + 25 + expect(values.s).toBe(true); 26 + }); 27 + }); 28 + 29 + describe('parseBundleSelection', () => { 30 + let bundle: PLCBundle; 31 + 32 + beforeEach(async () => { 33 + bundle = new PLCBundle(TEMP_DIR); 34 + const mockIndex = createMockIndex(); 35 + await bundle.saveIndex(mockIndex); 36 + }); 37 + 38 + test('defaults to all bundles', async () => { 39 + const { start, end } = await parseBundleSelection({}, bundle); 40 + 41 + expect(start).toBe(1); 42 + expect(end).toBe(3); // From mock index 43 + }); 44 + 45 + test('parses single bundle', async () => { 46 + const { start, end } = await parseBundleSelection({ bundles: '2' }, bundle); 47 + 48 + expect(start).toBe(2); 49 + expect(end).toBe(2); 50 + }); 51 + 52 + test('parses bundle range', async () => { 53 + const { start, end } = await parseBundleSelection({ bundles: '1-3' }, bundle); 54 + 55 + expect(start).toBe(1); 56 + expect(end).toBe(3); 57 + }); 58 + 59 + test('throws error for invalid range', async () => { 60 + try { 61 + await parseBundleSelection({ bundles: '1-999' }, bundle); 62 + expect(true).toBe(false); // Should throw 63 + } catch (error) { 64 + expect(error).toBeDefined(); 65 + expect((error as Error).message).toContain('Invalid bundle range'); 66 + } 67 + }); 68 + 69 + test('throws error for invalid format', async () => { 70 + try { 71 + await parseBundleSelection({ bundles: 'invalid' }, bundle); 72 + expect(true).toBe(false); // Should throw 73 + } catch (error) { 74 + expect(error).toBeDefined(); 75 + } 76 + }); 77 + }); 78 + });
+101
tests/multithread.test.ts
··· 1 + import { describe, test, expect, beforeEach } from 'bun:test'; 2 + import { PLCBundle } from '../src/plcbundle'; 3 + import { TEMP_DIR, createMockIndex, createMockOperations } from './setup'; 4 + 5 + describe('Multi-threaded Processing', () => { 6 + let bundle: PLCBundle; 7 + let modulePath: string; 8 + 9 + beforeEach(async () => { 10 + bundle = new PLCBundle(TEMP_DIR); 11 + 12 + const mockIndex = createMockIndex(); 13 + await bundle.saveIndex(mockIndex); 14 + 15 + // Create test bundles 16 + for (let i = 1; i <= 5; i++) { 17 + const operations = createMockOperations(100); 18 + const jsonl = operations.map(op => JSON.stringify(op)).join('\n') + '\n'; 19 + const compressed = Bun.zstdCompressSync(new TextEncoder().encode(jsonl)); 20 + await Bun.write(bundle.getBundlePath(i), compressed); 21 + } 22 + 23 + // Create test module with Bun.resolveSync 24 + modulePath = Bun.resolveSync(`${TEMP_DIR}/test-module.ts`, process.cwd()); 25 + await Bun.write(modulePath, ` 26 + export function detect({ op }) { 27 + return op.did.length > 10 ? ['long-did'] : []; 28 + } 29 + `); 30 + }); 31 + 32 + test('module loads correctly', async () => { 33 + const mod = await import(modulePath); 34 + expect(mod.detect).toBeDefined(); 35 + }); 36 + 37 + test('single-threaded processing works', async () => { 38 + const stats = await bundle.processBundles(1, 2, { 39 + module: modulePath, 40 + threads: 1, 41 + }); 42 + 43 + expect(stats.totalOps).toBe(200); // 2 bundles * 100 ops 44 + expect(stats.totalBytes).toBeGreaterThan(0); 45 + }); 46 + 47 + test('multi-threaded processing with 2 threads', async () => { 48 + const stats = await bundle.processBundles(1, 4, { 49 + module: modulePath, 50 + threads: 2, 51 + }); 52 + 53 + expect(stats.totalOps).toBe(400); // 4 bundles * 100 ops 54 + expect(stats.totalBytes).toBeGreaterThan(0); 55 + }); 56 + 57 + test('multi-threaded produces same op count as single-threaded', async () => { 58 + const stats1 = await bundle.processBundles(1, 3, { 59 + module: modulePath, 60 + threads: 1, 61 + }); 62 + 63 + const stats2 = await bundle.processBundles(1, 3, { 64 + module: modulePath, 65 + threads: 2, 66 + }); 67 + 68 + expect(stats1.totalOps).toBe(stats2.totalOps); 69 + expect(stats1.totalBytes).toBe(stats2.totalBytes); 70 + }); 71 + 72 + test('progress callback works with multi-threading', async () => { 73 + let progressCalls = 0; 74 + 75 + await bundle.processBundles(1, 5, { 76 + module: modulePath, 77 + threads: 2, 78 + onProgress: () => { 79 + progressCalls++; 80 + }, 81 + }); 82 + 83 + // May or may not be called depending on threshold 84 + expect(typeof progressCalls).toBe('number'); 85 + }); 86 + 87 + test('validates threads parameter', async () => { 88 + let errorThrown = false; 89 + 90 + try { 91 + await bundle.processBundles(1, 1, () => {}, { 92 + threads: 4, // Without module - should throw 93 + }); 94 + } catch (error) { 95 + errorThrown = true; 96 + expect((error as Error).message).toContain('module'); 97 + } 98 + 99 + expect(errorThrown).toBe(true); 100 + }); 101 + });
+223 -40
tests/processing.test.ts
··· 51 51 }); 52 52 }); 53 53 54 + describe('streamOperations', () => { 55 + test('streams operations from bundle', async () => { 56 + const operations = []; 57 + 58 + for await (const { op, line } of bundle.streamOperations(1)) { 59 + operations.push(op); 60 + expect(line).toBeDefined(); 61 + expect(line.length).toBeGreaterThan(0); 62 + } 63 + 64 + expect(operations.length).toBe(100); 65 + expect(operations[0].did).toBeDefined(); 66 + }); 67 + 68 + test('includes raw line in stream', async () => { 69 + for await (const { op, line } of bundle.streamOperations(1)) { 70 + expect(typeof line).toBe('string'); 71 + expect(line.trim().length).toBeGreaterThan(0); 72 + 73 + // Line should be valid JSON 74 + const parsed = JSON.parse(line); 75 + expect(parsed.did).toBe(op.did); 76 + break; // Just check first 77 + } 78 + }); 79 + }); 80 + 54 81 describe('processBundles', () => { 55 - test('calls callback for each operation', async () => { 82 + test('processes with callback function', async () => { 56 83 const callback = mock(() => {}); 57 84 58 - await bundle.processBundles(1, 1, callback, { threads: 1 }); 85 + const stats = await bundle.processBundles(1, 1, callback); 59 86 60 - // Should have been called for each operation (100 in our mock) 61 87 expect(callback).toHaveBeenCalled(); 62 88 expect(callback.mock.calls.length).toBe(100); 89 + expect(stats.totalOps).toBe(100); 63 90 }); 64 91 65 - test('tracks statistics', async () => { 92 + test('callback receives all parameters', async () => { 93 + const callback = mock((op: any, position: number, bundleNum: number, line: string) => { 94 + expect(op).toBeDefined(); 95 + expect(typeof position).toBe('number'); 96 + expect(typeof bundleNum).toBe('number'); 97 + expect(typeof line).toBe('string'); 98 + }); 99 + 100 + await bundle.processBundles(1, 1, callback); 101 + 102 + expect(callback).toHaveBeenCalled(); 103 + }); 104 + 105 + test('tracks statistics accurately', async () => { 66 106 const callback = mock(() => {}); 67 107 68 - const stats = await bundle.processBundles(1, 1, callback, { 69 - threads: 1, 70 - }); 108 + const stats = await bundle.processBundles(1, 1, callback); 71 109 72 - expect(stats).toBeDefined(); 73 - expect(stats.totalOps).toBe(100); // Our mock has 100 operations 110 + expect(stats.totalOps).toBe(100); 74 111 expect(stats.totalBytes).toBeGreaterThan(0); 112 + expect(stats.matchCount).toBe(0); 113 + expect(stats.matchedBytes).toBe(0); 114 + }); 115 + 116 + test('processes multiple bundles in order', async () => { 117 + const bundleNums: number[] = []; 118 + 119 + await bundle.processBundles(1, 3, (op, position, bundleNum) => { 120 + bundleNums.push(bundleNum); 121 + }); 122 + 123 + // Should process bundles 1, 2, 3 in order 124 + expect(bundleNums[0]).toBe(1); 125 + expect(bundleNums[99]).toBe(1); // Last of bundle 1 126 + expect(bundleNums[100]).toBe(2); // First of bundle 2 127 + expect(bundleNums[299]).toBe(3); // Last of bundle 3 75 128 }); 76 129 77 130 test('supports progress callback', async () => { 78 131 const progressCallback = mock(() => {}); 79 - const processCallback = mock(() => {}); 80 132 81 - await bundle.processBundles(1, 1, processCallback, { 133 + await bundle.processBundles(1, 1, () => {}, { 82 134 onProgress: progressCallback, 83 135 }); 84 136 85 - // Callback was called 86 - expect(processCallback).toHaveBeenCalled(); 137 + // Progress should not be called for only 100 operations (threshold is 10,000) 138 + expect(progressCallback.mock.calls.length).toBe(0); 87 139 }); 88 140 89 - test('respects thread option', async () => { 90 - const callback = mock(() => {}); 141 + test('calls progress callback at threshold', async () => { 142 + // Create larger bundle to trigger progress 143 + const largeOps = createMockOperations(15000); 144 + const jsonl = largeOps.map(op => JSON.stringify(op)).join('\n') + '\n'; 145 + const compressed = Bun.zstdCompressSync(new TextEncoder().encode(jsonl)); 146 + await Bun.write(bundle.getBundlePath(10), compressed); 91 147 92 - const stats1 = await bundle.processBundles(1, 1, callback, { threads: 1 }); 148 + const progressCallback = mock(() => {}); 93 149 94 - const callback2 = mock(() => {}); 95 - const stats4 = await bundle.processBundles(1, 1, callback2, { threads: 4 }); 150 + await bundle.processBundles(10, 10, () => {}, { 151 + onProgress: progressCallback, 152 + }); 96 153 97 - // Both should work and process same number of operations 98 - expect(stats1.totalOps).toBe(100); 99 - expect(stats4.totalOps).toBe(100); 154 + // Should be called at least once (at 10,000 ops) 155 + expect(progressCallback.mock.calls.length).toBeGreaterThan(0); 100 156 }); 101 157 102 - test('processes multiple bundles', async () => { 103 - const callback = mock(() => {}); 158 + test('line length matches original JSONL', async () => { 159 + const sizes: number[] = []; 160 + 161 + await bundle.processBundles(1, 1, (op, position, bundleNum, line) => { 162 + sizes.push(line.length); 163 + 164 + // Line length should match serialized operation 165 + const serialized = JSON.stringify(op); 166 + expect(line.length).toBeGreaterThanOrEqual(serialized.length - 10); // Allow small variance 167 + }); 168 + 169 + expect(sizes.length).toBe(100); 170 + expect(sizes.every(s => s > 0)).toBe(true); 171 + }); 172 + 173 + test('supports async callbacks', async () => { 174 + const callback = mock(async (op: any) => { 175 + await new Promise(resolve => setTimeout(resolve, 1)); 176 + }); 104 177 105 - const stats = await bundle.processBundles(1, 3, callback, { threads: 1 }); 178 + const stats = await bundle.processBundles(1, 1, callback); 179 + 180 + expect(callback).toHaveBeenCalled(); 181 + expect(stats.totalOps).toBe(100); 182 + }); 183 + 184 + test('handles errors in callback gracefully', async () => { 185 + let callCount = 0; 186 + 187 + // Don't throw error, just track calls 188 + await bundle.processBundles(1, 1, () => { 189 + callCount++; 190 + // Don't throw - just count 191 + }); 106 192 107 - // Should process all 3 bundles (300 operations total) 108 - expect(stats.totalOps).toBe(300); 109 - expect(callback.mock.calls.length).toBe(300); 193 + expect(callCount).toBe(100); 110 194 }); 111 195 }); 112 196 113 - describe('streamOperations', () => { 114 - test('streams operations from bundle', async () => { 115 - const operations = []; 197 + describe('processBundles with module path', () => { 198 + test('loads module and calls function', async () => { 199 + // Create a test module 200 + const testModulePath = Bun.resolveSync(`${TEMP_DIR}/test-module.ts`, process.cwd()); 201 + await Bun.write(testModulePath, ` 202 + export function detect({ op }) { 203 + return op.did.startsWith('did:plc:') ? ['test'] : []; 204 + } 205 + `); 206 + 207 + const stats = await bundle.processBundles(1, 1, { 208 + module: testModulePath, 209 + }); 116 210 117 - for await (const op of bundle.streamOperations(1)) { 118 - operations.push(op); 211 + expect(stats.totalOps).toBe(100); 212 + }); 213 + 214 + test('supports silent mode', async () => { 215 + // Create absolute path directly (file doesn't exist yet to resolve) 216 + const testModulePath = `${process.cwd()}/${TEMP_DIR}/noisy-module.ts`; 217 + await Bun.write(testModulePath, ` 218 + export function detect({ op }) { 219 + console.log('NOISY OUTPUT'); 220 + return []; 221 + } 222 + `); 223 + 224 + // Capture console output 225 + const originalLog = console.log; 226 + const originalError = console.error; 227 + let logOutput = ''; 228 + 229 + console.log = (...args: any[]) => { 230 + logOutput += args.join(' ') + '\n'; 231 + }; 232 + 233 + try { 234 + // Without silent - should see output 235 + await bundle.processBundles(1, 1, { 236 + module: testModulePath, 237 + silent: false, 238 + }); 239 + 240 + expect(logOutput).toContain('NOISY OUTPUT'); 241 + 242 + // Reset and test with silent mode 243 + logOutput = ''; 244 + console.log = () => {}; 245 + console.error = () => {}; 246 + 247 + await bundle.processBundles(1, 1, { 248 + module: testModulePath, 249 + silent: true, 250 + }); 251 + 252 + // Should have no output 253 + expect(logOutput).toBe(''); 254 + } finally { 255 + console.log = originalLog; 256 + console.error = originalError; 119 257 } 258 + }); 259 + 260 + test('loads module and calls function', async () => { 261 + // Create absolute path 262 + const testModulePath = `${process.cwd()}/${TEMP_DIR}/test-module.ts`; 263 + await Bun.write(testModulePath, ` 264 + export function detect({ op }) { 265 + return op.did.startsWith('did:plc:') ? ['test'] : []; 266 + } 267 + `); 120 268 121 - expect(operations.length).toBe(100); 122 - expect(operations[0].did).toBeDefined(); 269 + const stats = await bundle.processBundles(1, 1, { 270 + module: testModulePath, 271 + }); 272 + 273 + expect(stats.totalOps).toBe(100); 123 274 }); 124 275 125 - test('operations have required fields', async () => { 126 - for await (const op of bundle.streamOperations(1)) { 127 - expect(op.did).toBeDefined(); 128 - expect(op.cid).toBeDefined(); 129 - expect(op.createdAt).toBeDefined(); 130 - break; // Just check first one 276 + test('throws error for multi-threading without module', async () => { 277 + let errorThrown = false; 278 + let errorMessage = ''; 279 + 280 + try { 281 + await bundle.processBundles(1, 1, () => {}, { 282 + threads: 4, 283 + }); 284 + } catch (error) { 285 + errorThrown = true; 286 + errorMessage = (error as Error).message; 131 287 } 288 + 289 + expect(errorThrown).toBe(true); 290 + expect(errorMessage).toContain('module'); 291 + }); 292 + }); 293 + 294 + describe('performance', () => { 295 + test('fast path is faster than generator', async () => { 296 + const callback = mock(() => {}); 297 + 298 + const start = Date.now(); 299 + await bundle.processBundles(1, 3, callback); 300 + const duration = Date.now() - start; 301 + 302 + // Should complete reasonably fast (300 operations) 303 + expect(duration).toBeLessThan(1000); // Less than 1 second 304 + expect(callback.mock.calls.length).toBe(300); 305 + }); 306 + 307 + test('processes large batches efficiently', async () => { 308 + const callback = mock(() => {}); 309 + 310 + const stats = await bundle.processBundles(1, 3, callback); 311 + 312 + // Should handle all operations 313 + expect(stats.totalOps).toBe(300); 314 + expect(stats.totalBytes).toBeGreaterThan(0); 132 315 }); 133 316 }); 134 317 });