[DEPRECATED] Go implementation of plcbundle
at main 3.2 kB view raw
1#!/usr/bin/env bun 2 3// User's detect function 4function detect({ op }) { 5 const labels = []; 6 7 if (op.did.startsWith('did:plc:aa')) { 8 labels.push('test') 9 } 10 11 console.log(op.operation.sig) 12 13 return labels; 14} 15 16// ========================================== 17// Pure Bun bundle processor with native zstd 18// ========================================== 19 20const BUNDLE_DIR = process.argv[2] || './'; 21const START_BUNDLE = parseInt(process.argv[3]) || 1; 22const END_BUNDLE = parseInt(process.argv[4]) || 100; 23 24console.error(`Processing bundles ${START_BUNDLE}-${END_BUNDLE} from ${BUNDLE_DIR}`); 25console.error(''); 26 27// CSV header 28console.log('bundle,position,cid,size,confidence,labels'); 29 30let totalOps = 0; 31let matchCount = 0; 32let totalBytes = 0; 33let matchedBytes = 0; 34 35const startTime = Date.now(); 36 37for (let bundleNum = START_BUNDLE; bundleNum <= END_BUNDLE; bundleNum++) { 38 const bundleFile = `${BUNDLE_DIR}/${bundleNum.toString().padStart(6, '0')}.jsonl.zst`; 39 40 try { 41 // Read compressed bundle 42 const compressed = await Bun.file(bundleFile).arrayBuffer(); 43 44 // Decompress using native Bun zstd (FAST!) 45 const decompressed = Bun.zstdDecompressSync(compressed); 46 47 // Convert to text 48 const text = new TextDecoder().decode(decompressed); 49 50 const lines = text.split('\n').filter(line => line.trim()); 51 52 for (let position = 0; position < lines.length; position++) { 53 const line = lines[position]; 54 if (!line) continue; 55 56 totalOps++; 57 const opSize = line.length; 58 totalBytes += opSize; 59 60 try { 61 const op = JSON.parse(line); 62 const labels = detect({ op }); 63 64 if (labels && labels.length > 0) { 65 matchCount++; 66 matchedBytes += opSize; 67 68 // Extract last 4 chars of CID 69 const cidShort = op.cid.slice(-4); 70 71 console.log( 72 `${bundleNum},${position},${cidShort},${opSize},0.95,${labels.join(';')}` 73 ); 74 } 75 } catch (err) { 76 console.error(`Error parsing operation: ${err.message}`); 77 } 78 } 79 80 // Progress 81 if (bundleNum % 10 === 0) { 82 const elapsed = (Date.now() - startTime) / 1000; 83 const opsPerSec = (totalOps / elapsed).toFixed(0); 84 console.error(`Processed ${bundleNum}/${END_BUNDLE} bundles | ${totalOps} ops | ${opsPerSec} ops/sec\r`); 85 } 86 87 } catch (err) { 88 console.error(`\nError loading bundle ${bundleNum}: ${err.message}`); 89 } 90} 91 92const elapsed = (Date.now() - startTime) / 1000; 93 94// Stats 95console.error('\n'); 96console.error('✓ Detection complete'); 97console.error(` Total operations: ${totalOps}`); 98console.error(` Matches found: ${matchCount} (${(matchCount/totalOps*100).toFixed(2)}%)`); 99console.error(` Total size: ${(totalBytes / 1e6).toFixed(1)} MB`); 100console.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/totalBytes*100).toFixed(2)}%)`); 101console.error(''); 102console.error(` Time elapsed: ${elapsed.toFixed(2)}s`); 103console.error(` Throughput: ${(totalOps / elapsed).toFixed(0)} ops/sec`); 104console.error(` Speed: ${(totalBytes / elapsed / 1e6).toFixed(1)} MB/sec`);