[DEPRECATED] Go implementation of plcbundle
1#!/usr/bin/env bun
2
3// User's detect function
4function detect({ op }) {
5 const labels = [];
6
7 if (op.did.startsWith('did:plc:aa')) {
8 labels.push('test')
9 }
10
11 console.log(op.operation.sig)
12
13 return labels;
14}
15
16// ==========================================
17// Pure Bun bundle processor with native zstd
18// ==========================================
19
20const BUNDLE_DIR = process.argv[2] || './';
21const START_BUNDLE = parseInt(process.argv[3]) || 1;
22const END_BUNDLE = parseInt(process.argv[4]) || 100;
23
24console.error(`Processing bundles ${START_BUNDLE}-${END_BUNDLE} from ${BUNDLE_DIR}`);
25console.error('');
26
27// CSV header
28console.log('bundle,position,cid,size,confidence,labels');
29
30let totalOps = 0;
31let matchCount = 0;
32let totalBytes = 0;
33let matchedBytes = 0;
34
35const startTime = Date.now();
36
37for (let bundleNum = START_BUNDLE; bundleNum <= END_BUNDLE; bundleNum++) {
38 const bundleFile = `${BUNDLE_DIR}/${bundleNum.toString().padStart(6, '0')}.jsonl.zst`;
39
40 try {
41 // Read compressed bundle
42 const compressed = await Bun.file(bundleFile).arrayBuffer();
43
44 // Decompress using native Bun zstd (FAST!)
45 const decompressed = Bun.zstdDecompressSync(compressed);
46
47 // Convert to text
48 const text = new TextDecoder().decode(decompressed);
49
50 const lines = text.split('\n').filter(line => line.trim());
51
52 for (let position = 0; position < lines.length; position++) {
53 const line = lines[position];
54 if (!line) continue;
55
56 totalOps++;
57 const opSize = line.length;
58 totalBytes += opSize;
59
60 try {
61 const op = JSON.parse(line);
62 const labels = detect({ op });
63
64 if (labels && labels.length > 0) {
65 matchCount++;
66 matchedBytes += opSize;
67
68 // Extract last 4 chars of CID
69 const cidShort = op.cid.slice(-4);
70
71 console.log(
72 `${bundleNum},${position},${cidShort},${opSize},0.95,${labels.join(';')}`
73 );
74 }
75 } catch (err) {
76 console.error(`Error parsing operation: ${err.message}`);
77 }
78 }
79
80 // Progress
81 if (bundleNum % 10 === 0) {
82 const elapsed = (Date.now() - startTime) / 1000;
83 const opsPerSec = (totalOps / elapsed).toFixed(0);
84 console.error(`Processed ${bundleNum}/${END_BUNDLE} bundles | ${totalOps} ops | ${opsPerSec} ops/sec\r`);
85 }
86
87 } catch (err) {
88 console.error(`\nError loading bundle ${bundleNum}: ${err.message}`);
89 }
90}
91
92const elapsed = (Date.now() - startTime) / 1000;
93
94// Stats
95console.error('\n');
96console.error('✓ Detection complete');
97console.error(` Total operations: ${totalOps}`);
98console.error(` Matches found: ${matchCount} (${(matchCount/totalOps*100).toFixed(2)}%)`);
99console.error(` Total size: ${(totalBytes / 1e6).toFixed(1)} MB`);
100console.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/totalBytes*100).toFixed(2)}%)`);
101console.error('');
102console.error(` Time elapsed: ${elapsed.toFixed(2)}s`);
103console.error(` Throughput: ${(totalOps / elapsed).toFixed(0)} ops/sec`);
104console.error(` Speed: ${(totalBytes / elapsed / 1e6).toFixed(1)} MB/sec`);