+2
-1
examples/analyze.ts
+2
-1
examples/analyze.ts
···
19
19
withPds: 0,
20
20
};
21
21
22
-
await bundle.processBundles(1, 10, (op) => {
22
+
await bundle.processBundles(1, 10, (op, position, bundleNum, line) => {
23
23
stats.totalOps++;
24
24
stats.uniqueDids.add(op.did);
25
25
···
37
37
stats.withPds++;
38
38
}
39
39
});
40
+
40
41
41
42
console.log('📊 Analysis Results\n');
42
43
console.log(`Total operations: ${stats.totalOps.toLocaleString()}`);
+11
examples/detect-orig.js
+11
examples/detect-orig.js
+2
-2
examples/detect.ts
+2
-2
examples/detect.ts
+31
examples/info.ts
+31
examples/info.ts
···
1
+
/**
2
+
* Example: Get repository information
3
+
*
4
+
* Usage:
5
+
* bun examples/info.ts
6
+
*/
7
+
8
+
import { PLCBundle } from '../src';
9
+
10
+
const bundle = new PLCBundle('./data/bundles');
11
+
12
+
const stats = await bundle.getStats();
13
+
14
+
console.log('📦 Repository Information\n');
15
+
console.log(`Version: ${stats.version}`);
16
+
console.log(`Last bundle: ${stats.lastBundle}`);
17
+
console.log(`Total bundles: ${stats.totalBundles}`);
18
+
console.log(`Total size: ${(stats.totalSize / 1e9).toFixed(2)} GB`);
19
+
console.log(`Updated: ${stats.updatedAt}`);
20
+
21
+
console.log('\n📊 Sample Bundle Info\n');
22
+
23
+
const metadata = await bundle.getMetadata(1);
24
+
if (metadata) {
25
+
console.log(`Bundle #${metadata.bundle_number}`);
26
+
console.log(` Operations: ${metadata.operation_count.toLocaleString()}`);
27
+
console.log(` Unique DIDs: ${metadata.did_count.toLocaleString()}`);
28
+
console.log(` Time range: ${metadata.start_time} → ${metadata.end_time}`);
29
+
console.log(` Size: ${(metadata.compressed_size / 1e6).toFixed(2)} MB (compressed)`);
30
+
console.log(` Size: ${(metadata.uncompressed_size / 1e6).toFixed(2)} MB (uncompressed)`);
31
+
}
-32
examples/library-use.js
-32
examples/library-use.js
···
1
-
import { PLCBundle } from './src';
2
-
3
-
// Create bundle reader
4
-
const plcbundle = new PLCBundle('./bundles');
5
-
6
-
// Get stats
7
-
const stats = await plcbundle.getStats();
8
-
console.log(stats);
9
-
10
-
// Stream operations
11
-
for await (const op of plcbundle.streamOperations(1)) {
12
-
console.log(op.did);
13
-
}
14
-
15
-
await bundle.processBundles(1, 100, (op, pos, num) => {
16
-
// Your logic
17
-
}, {
18
-
threads: 4,
19
-
onProgress: (stats) => {
20
-
console.log(`Processed ${stats.totalOps} ops`);
21
-
}
22
-
});
23
-
24
-
// Clone with progress
25
-
await bundle.clone('https://plcbundle.atscan.net', {
26
-
threads: 8,
27
-
bundles: '1-100',
28
-
verify: true,
29
-
onProgress: (stats) => {
30
-
console.log(`Downloaded ${stats.downloadedBundles}/${stats.totalBundles}`);
31
-
}
32
-
});
+1
-1
examples/parallel.ts
+1
-1
examples/parallel.ts
+28
examples/processor.ts
+28
examples/processor.ts
···
1
+
/**
2
+
* Example process function for general operation processing
3
+
*
4
+
* Usage:
5
+
* plcbundle-bun process ./examples/processor.ts
6
+
*/
7
+
8
+
export function process({ op, position, bundle, line }: {
9
+
op: any;
10
+
position: number;
11
+
bundle: number;
12
+
line: string;
13
+
}) {
14
+
// Example: Count operations by year
15
+
const year = op.createdAt.substring(0, 4);
16
+
17
+
// Example: Log specific operations
18
+
if (position % 1000 === 0) {
19
+
console.log(`Bundle ${bundle}, position ${position}: ${op.did}`);
20
+
}
21
+
22
+
// Example: Custom logic
23
+
if (op.operation?.alsoKnownAs?.length > 0) {
24
+
// Do something with operations that have handles
25
+
}
26
+
27
+
// No need to return anything
28
+
}
+5
-3
src/cli.ts
+5
-3
src/cli.ts
···
2
2
3
3
import { clone } from './cmds/clone';
4
4
import { detect } from './cmds/detect';
5
+
import { process as processCmd } from './cmds/process';
5
6
import { info } from './cmds/info';
6
7
import { verify } from './cmds/verify';
7
8
import { exportCmd } from './cmds/export';
···
9
10
const commands = {
10
11
clone,
11
12
detect,
13
+
process: processCmd,
12
14
info,
13
15
verify,
14
16
export: exportCmd,
···
23
25
COMMANDS:
24
26
clone Clone bundles from a remote repository
25
27
detect Detect and filter operations using a custom function
28
+
process Process operations with a custom function
26
29
info Show index or bundle information
27
30
verify Verify bundle integrity
28
31
export Export operations from bundle
···
32
35
33
36
EXAMPLES:
34
37
bun cli clone --remote https://plcbundle.atscan.net
38
+
bun cli detect ./examples/detect.ts --bundles 1-100
39
+
bun cli process ./my-processor.ts --threads 4
35
40
bun cli info --dir ./bundles
36
-
bun cli detect --detect ./examples/detect.ts
37
-
bun cli detect --detect ./examples/detect.ts --bundles 1-100
38
-
bun cli detect --detect ./examples/detect.ts --bundles 42 --threads 4
39
41
bun cli verify --bundle 42
40
42
bun cli export --bundle 1 > ops.jsonl
41
43
`);
+233
src/cmds/common.ts
+233
src/cmds/common.ts
···
1
+
import { PLCBundle } from '../plcbundle';
2
+
import type { ProcessStats } from '../types';
3
+
import { parseArgs } from 'util';
4
+
5
+
export interface ProcessingOptions {
6
+
dir: string;
7
+
start: number;
8
+
end: number;
9
+
modulePath: string;
10
+
threads: number;
11
+
silent: boolean;
12
+
flush?: boolean;
13
+
mode: 'detect' | 'process';
14
+
onMatch?: (match: any, matchCount: number, matchedBytes: number) => void;
15
+
}
16
+
17
+
/**
18
+
* Common processing logic for both detect and process commands
19
+
*/
20
+
export async function processOperations(options: ProcessingOptions) {
21
+
const { dir, start, end, modulePath, threads, silent, flush, mode, onMatch } = options;
22
+
23
+
const bundle = new PLCBundle(dir);
24
+
25
+
// Save original console
26
+
const originalConsole = {
27
+
log: console.log,
28
+
error: console.error,
29
+
warn: console.warn,
30
+
info: console.info,
31
+
debug: console.debug,
32
+
};
33
+
34
+
// Override console if silent
35
+
if (silent) {
36
+
console.log = () => {};
37
+
console.error = () => {};
38
+
console.warn = () => {};
39
+
console.info = () => {};
40
+
console.debug = () => {};
41
+
}
42
+
43
+
try {
44
+
originalConsole.error(`Processing bundles ${start}-${end} from ${dir}${threads > 1 ? ` (${threads} threads)` : ''}${silent ? ' (silent)' : ''}\n`);
45
+
46
+
if (mode === 'detect') {
47
+
originalConsole.log('bundle,position,cid,size,confidence,labels');
48
+
}
49
+
50
+
const startTime = Date.now();
51
+
let matchCount = 0;
52
+
let matchedBytes = 0;
53
+
54
+
if (threads > 1) {
55
+
// Multi-threaded mode
56
+
const result: any = await bundle.processBundles(start, end, {
57
+
module: modulePath,
58
+
threads,
59
+
silent,
60
+
flush,
61
+
onProgress: (progressStats: ProcessStats) => {
62
+
const elapsed = (Date.now() - startTime) / 1000;
63
+
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
64
+
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
65
+
originalConsole.error(`Processed ${progressStats.totalOps} ops | ${opsPerSec} ops/sec | ${mbPerSec} MB/s\r`);
66
+
},
67
+
onMatch: flush && onMatch ? (match) => {
68
+
matchCount++;
69
+
matchedBytes += match.size;
70
+
onMatch(match, matchCount, matchedBytes);
71
+
} : undefined,
72
+
});
73
+
74
+
// Output buffered matches (if not flushed)
75
+
if (!flush && result.matches && onMatch) {
76
+
for (const match of result.matches) {
77
+
matchCount++;
78
+
matchedBytes += match.size;
79
+
onMatch(match, matchCount, matchedBytes);
80
+
}
81
+
}
82
+
83
+
const elapsed = (Date.now() - startTime) / 1000;
84
+
const opsPerSec = (result.totalOps / elapsed).toFixed(0);
85
+
const mbPerSec = (result.totalBytes / elapsed / 1e6).toFixed(1);
86
+
87
+
originalConsole.error('\n');
88
+
originalConsole.error(`✓ ${mode === 'detect' ? 'Detection' : 'Processing'} complete`);
89
+
originalConsole.error(` Total operations: ${result.totalOps.toLocaleString()}`);
90
+
91
+
if (mode === 'detect') {
92
+
originalConsole.error(` Matches found: ${matchCount.toLocaleString()} (${(matchCount/result.totalOps*100).toFixed(2)}%)`);
93
+
originalConsole.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/result.totalBytes*100).toFixed(2)}%)`);
94
+
}
95
+
96
+
originalConsole.error(` Total size: ${(result.totalBytes / 1e6).toFixed(1)} MB`);
97
+
originalConsole.error('');
98
+
originalConsole.error(` Time elapsed: ${elapsed.toFixed(2)}s`);
99
+
originalConsole.error(` Throughput: ${opsPerSec} ops/sec | ${mbPerSec} MB/s`);
100
+
originalConsole.error(` Threads: ${threads}`);
101
+
} else {
102
+
// Single-threaded mode
103
+
const mod = await import(modulePath);
104
+
const userFn = mode === 'detect' ? (mod.detect || mod.default) : (mod.process || mod.default);
105
+
106
+
const finalStats = await bundle.processBundles(
107
+
start,
108
+
end,
109
+
(op, position, bundleNum, line) => {
110
+
if (mode === 'detect') {
111
+
const labels = userFn({ op });
112
+
113
+
if (labels && labels.length > 0) {
114
+
matchCount++;
115
+
matchedBytes += line.length;
116
+
if (onMatch) {
117
+
onMatch({
118
+
bundle: bundleNum,
119
+
position,
120
+
cid: op.cid.slice(-4),
121
+
size: line.length,
122
+
labels
123
+
}, matchCount, matchedBytes);
124
+
}
125
+
}
126
+
} else {
127
+
// Process mode - just call function
128
+
userFn({ op, position, bundle: bundleNum, line });
129
+
}
130
+
},
131
+
{
132
+
onProgress: (progressStats: ProcessStats) => {
133
+
const elapsed = (Date.now() - startTime) / 1000;
134
+
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
135
+
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
136
+
originalConsole.error(`Processed ${progressStats.totalOps} ops | ${opsPerSec} ops/sec | ${mbPerSec} MB/s\r`);
137
+
},
138
+
}
139
+
);
140
+
141
+
const elapsed = (Date.now() - startTime) / 1000;
142
+
const opsPerSec = (finalStats.totalOps / elapsed).toFixed(0);
143
+
const mbPerSec = (finalStats.totalBytes / elapsed / 1e6).toFixed(1);
144
+
145
+
originalConsole.error('\n');
146
+
originalConsole.error(`✓ ${mode === 'detect' ? 'Detection' : 'Processing'} complete`);
147
+
originalConsole.error(` Total operations: ${finalStats.totalOps.toLocaleString()}`);
148
+
149
+
if (mode === 'detect') {
150
+
originalConsole.error(` Matches found: ${matchCount.toLocaleString()} (${(matchCount/finalStats.totalOps*100).toFixed(2)}%)`);
151
+
originalConsole.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/finalStats.totalBytes*100).toFixed(2)}%)`);
152
+
}
153
+
154
+
originalConsole.error(` Total size: ${(finalStats.totalBytes / 1e6).toFixed(1)} MB`);
155
+
originalConsole.error('');
156
+
originalConsole.error(` Time elapsed: ${elapsed.toFixed(2)}s`);
157
+
originalConsole.error(` Throughput: ${opsPerSec} ops/sec | ${mbPerSec} MB/s`);
158
+
if (threads > 1) {
159
+
originalConsole.error(` Threads: ${threads}`);
160
+
}
161
+
}
162
+
} finally {
163
+
// Restore console
164
+
console.log = originalConsole.log;
165
+
console.error = originalConsole.error;
166
+
console.warn = originalConsole.warn;
167
+
console.info = originalConsole.info;
168
+
console.debug = originalConsole.debug;
169
+
}
170
+
}
171
+
172
+
/**
173
+
* Common argument parsing for process/detect commands
174
+
*/
175
+
export function parseProcessArgs(args: string[]) {
176
+
const { values, positionals } = parseArgs({
177
+
args,
178
+
options: {
179
+
dir: { type: 'string', default: './' },
180
+
bundles: { type: 'string' },
181
+
threads: { type: 'string', default: '1' },
182
+
silent: { type: 'boolean', default: false },
183
+
s: { type: 'boolean', default: false },
184
+
flush: { type: 'boolean', default: false },
185
+
},
186
+
strict: false,
187
+
allowPositionals: true,
188
+
});
189
+
190
+
return { values, positionals };
191
+
}
192
+
193
+
/**
194
+
* Parse bundle selection from values
195
+
*/
196
+
export async function parseBundleSelection(
197
+
values: any,
198
+
bundle: PLCBundle
199
+
): Promise<{ start: number; end: number }> {
200
+
const stats = await bundle.getStats();
201
+
202
+
let start: number, end: number;
203
+
204
+
if (values.bundles && typeof values.bundles === 'string') {
205
+
const bundleSpec = values.bundles;
206
+
207
+
if (bundleSpec.includes('-')) {
208
+
const [startStr, endStr] = bundleSpec.split('-');
209
+
start = parseInt(startStr.trim());
210
+
end = parseInt(endStr.trim());
211
+
212
+
if (isNaN(start) || isNaN(end)) {
213
+
throw new Error(`Invalid bundle range: ${bundleSpec}`);
214
+
}
215
+
} else {
216
+
start = parseInt(bundleSpec);
217
+
end = start;
218
+
219
+
if (isNaN(start)) {
220
+
throw new Error(`Invalid bundle number: ${bundleSpec}`);
221
+
}
222
+
}
223
+
} else {
224
+
start = 1;
225
+
end = stats.lastBundle;
226
+
}
227
+
228
+
if (start < 1 || end > stats.lastBundle || start > end) {
229
+
throw new Error(`Invalid bundle range ${start}-${end} (available: 1-${stats.lastBundle})`);
230
+
}
231
+
232
+
return { start, end };
233
+
}
+34
-111
src/cmds/detect.ts
+34
-111
src/cmds/detect.ts
···
1
-
import { parseArgs } from 'util';
1
+
import { parseProcessArgs, parseBundleSelection, processOperations } from './common';
2
2
import { PLCBundle } from '../plcbundle';
3
-
import type { ProcessStats } from '../types';
4
3
5
4
export async function detect(args: string[]) {
6
5
if (args.includes('-h') || args.includes('--help')) {
7
6
console.log(`
8
7
detect - Detect and filter operations using a custom function
9
8
9
+
USAGE:
10
+
plcbundle-bun detect <module> [options]
11
+
12
+
ARGUMENTS:
13
+
<module> Path to detect function module (required)
14
+
10
15
OPTIONS:
11
16
--dir <path> Bundle directory (default: ./)
12
17
--bundles <spec> Bundle selection: number (42) or range (1-50)
13
-
If not specified, processes all available bundles
14
-
--detect <path> Path to detect function module (required)
15
18
--threads <num> Number of worker threads (default: 1)
19
+
--flush Output matches immediately (unsorted)
20
+
-s, --silent Suppress all console output from detect script
16
21
17
22
EXAMPLES:
18
-
bun cli detect --detect ./detect.ts # All bundles
19
-
bun cli detect --detect ./detect.ts --bundles 42 # Single bundle
20
-
bun cli detect --detect ./detect.ts --bundles 1-50 # Range
21
-
bun cli detect --detect ./detect.ts --threads 4 # Multi-threaded
23
+
plcbundle-bun detect ./detect.ts
24
+
plcbundle-bun detect ./detect.ts --bundles 1-50 --threads 4
25
+
plcbundle-bun detect ./detect.ts --flush --silent
22
26
`);
23
27
return;
24
28
}
25
29
26
-
const { values } = parseArgs({
27
-
args,
28
-
options: {
29
-
dir: { type: 'string', default: './' },
30
-
bundles: { type: 'string' },
31
-
detect: { type: 'string' },
32
-
threads: { type: 'string', default: '1' },
33
-
},
34
-
strict: false,
35
-
});
30
+
const { values, positionals } = parseProcessArgs(args);
31
+
const modulePath = positionals[0];
32
+
33
+
if (!modulePath) {
34
+
console.error('Error: module path is required');
35
+
console.error('Usage: plcbundle-bun detect <module> [options]');
36
+
process.exit(1);
37
+
}
36
38
37
39
const dir = (values.dir as string) || './';
38
40
const threads = parseInt((values.threads as string) || '1');
41
+
const silent = Boolean(values.silent || values.s);
42
+
const flush = Boolean(values.flush);
39
43
40
-
if (!values.detect || typeof values.detect !== 'string') {
41
-
console.error('Error: --detect is required');
42
-
process.exit(1);
43
-
}
44
-
45
-
// Load bundle instance to get index
46
44
const bundle = new PLCBundle(dir);
47
-
const stats = await bundle.getStats();
45
+
const { start, end } = await parseBundleSelection(values, bundle);
46
+
const resolvedPath = Bun.resolveSync(modulePath, process.cwd());
48
47
49
-
// Parse bundle selection
50
-
let start: number, end: number;
51
-
52
-
if (values.bundles && typeof values.bundles === 'string') {
53
-
const bundleSpec = values.bundles;
54
-
55
-
if (bundleSpec.includes('-')) {
56
-
const [startStr, endStr] = bundleSpec.split('-');
57
-
start = parseInt(startStr.trim());
58
-
end = parseInt(endStr.trim());
59
-
60
-
if (isNaN(start) || isNaN(end)) {
61
-
console.error(`Error: Invalid bundle range: ${bundleSpec}`);
62
-
process.exit(1);
63
-
}
64
-
} else {
65
-
start = parseInt(bundleSpec);
66
-
end = start;
67
-
68
-
if (isNaN(start)) {
69
-
console.error(`Error: Invalid bundle number: ${bundleSpec}`);
70
-
process.exit(1);
71
-
}
72
-
}
73
-
} else {
74
-
start = 1;
75
-
end = stats.lastBundle;
76
-
}
77
-
78
-
// Validate range
79
-
if (start < 1 || end > stats.lastBundle || start > end) {
80
-
console.error(`Error: Invalid bundle range ${start}-${end} (available: 1-${stats.lastBundle})`);
81
-
process.exit(1);
82
-
}
83
-
84
-
// Resolve and load detect function
85
-
const detectPath = Bun.resolveSync(values.detect, process.cwd());
86
-
const mod = await import(detectPath);
87
-
const detectFn = mod.detect || mod.default;
88
-
89
-
console.error(`Processing bundles ${start}-${end} from ${dir}${threads > 1 ? ` (${threads} threads)` : ''}\n`);
90
-
console.log('bundle,position,cid,size,confidence,labels');
91
-
92
-
let matchCount = 0;
93
-
let matchedBytes = 0;
94
-
const startTime = Date.now();
95
-
96
-
// Process bundles with progress
97
-
const finalStats = await bundle.processBundles(
48
+
await processOperations({
49
+
dir,
98
50
start,
99
51
end,
100
-
(op, position, bundleNum) => {
101
-
const opSize = JSON.stringify(op).length;
102
-
const labels = detectFn({ op });
103
-
104
-
if (labels && labels.length > 0) {
105
-
matchCount++;
106
-
matchedBytes += opSize;
107
-
const cidShort = op.cid.slice(-4);
108
-
console.log(`${bundleNum},${position},${cidShort},${opSize},0.95,${labels.join(';')}`);
109
-
}
52
+
modulePath: resolvedPath,
53
+
threads,
54
+
silent,
55
+
flush,
56
+
mode: 'detect',
57
+
onMatch: (match) => {
58
+
console.log(`${match.bundle},${match.position},${match.cid},${match.size},0.95,${match.labels.join(';')}`);
110
59
},
111
-
{
112
-
threads,
113
-
onProgress: (progressStats: ProcessStats) => {
114
-
const elapsed = (Date.now() - startTime) / 1000;
115
-
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
116
-
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
117
-
console.error(`Processed ${progressStats.totalOps} ops | ${opsPerSec} ops/sec | ${mbPerSec} MB/s\r`);
118
-
},
119
-
}
120
-
);
121
-
122
-
const elapsed = (Date.now() - startTime) / 1000;
123
-
const opsPerSec = (finalStats.totalOps / elapsed).toFixed(0);
124
-
const mbPerSec = (finalStats.totalBytes / elapsed / 1e6).toFixed(1);
125
-
126
-
console.error('\n');
127
-
console.error('✓ Detection complete');
128
-
console.error(` Total operations: ${finalStats.totalOps.toLocaleString()}`);
129
-
console.error(` Matches found: ${matchCount.toLocaleString()} (${(matchCount/finalStats.totalOps*100).toFixed(2)}%)`);
130
-
console.error(` Total size: ${(finalStats.totalBytes / 1e6).toFixed(1)} MB`);
131
-
console.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/finalStats.totalBytes*100).toFixed(2)}%)`);
132
-
console.error('');
133
-
console.error(` Time elapsed: ${elapsed.toFixed(2)}s`);
134
-
console.error(` Throughput: ${opsPerSec} ops/sec | ${mbPerSec} MB/s`);
135
-
if (threads > 1) {
136
-
console.error(` Threads: ${threads}`);
137
-
}
60
+
});
138
61
}
+59
src/cmds/process.ts
+59
src/cmds/process.ts
···
1
+
import { parseProcessArgs, parseBundleSelection, processOperations } from './common';
2
+
import { PLCBundle } from '../plcbundle';
3
+
4
+
export async function process(args: string[]) {
5
+
if (args.includes('-h') || args.includes('--help')) {
6
+
console.log(`
7
+
process - Process operations with a custom function
8
+
9
+
USAGE:
10
+
plcbundle-bun process <module> [options]
11
+
12
+
ARGUMENTS:
13
+
<module> Path to process function module (required)
14
+
15
+
OPTIONS:
16
+
--dir <path> Bundle directory (default: ./)
17
+
--bundles <spec> Bundle selection: number (42) or range (1-50)
18
+
--threads <num> Number of worker threads (default: 1)
19
+
-s, --silent Suppress all console output from process script
20
+
21
+
EXAMPLES:
22
+
plcbundle-bun process ./my-processor.ts
23
+
plcbundle-bun process ./my-processor.ts --bundles 1-50 --threads 4
24
+
25
+
PROCESS FUNCTION:
26
+
export function process({ op, position, bundle, line }) {
27
+
// Your logic here
28
+
}
29
+
`);
30
+
return;
31
+
}
32
+
33
+
const { values, positionals } = parseProcessArgs(args);
34
+
const modulePath = positionals[0];
35
+
36
+
if (!modulePath) {
37
+
console.error('Error: module path is required');
38
+
console.error('Usage: plcbundle-bun process <module> [options]');
39
+
process.exit(1);
40
+
}
41
+
42
+
const dir = (values.dir as string) || './';
43
+
const threads = parseInt((values.threads as string) || '1');
44
+
const silent = Boolean(values.silent || values.s);
45
+
46
+
const bundle = new PLCBundle(dir);
47
+
const { start, end } = await parseBundleSelection(values, bundle);
48
+
const resolvedPath = Bun.resolveSync(modulePath, process.cwd());
49
+
50
+
await processOperations({
51
+
dir,
52
+
start,
53
+
end,
54
+
modulePath: resolvedPath,
55
+
threads,
56
+
silent,
57
+
mode: 'process',
58
+
});
59
+
}
+180
-46
src/plcbundle.ts
+180
-46
src/plcbundle.ts
···
244
244
* }
245
245
* ```
246
246
*/
247
-
async *streamOperations(bundleNum: number): AsyncGenerator<Operation> {
247
+
async *streamOperations(bundleNum: number): AsyncGenerator<{ op: Operation; line: string }> {
248
248
const content = await this.readBundle(bundleNum);
249
249
const lines = content.split('\n');
250
250
251
251
for (const line of lines) {
252
252
if (line.trim()) {
253
-
yield JSON.parse(line);
253
+
yield { op: JSON.parse(line), line };
254
254
}
255
255
}
256
256
}
257
257
258
258
/**
259
-
* Process multiple bundles with a callback function.
259
+
* Process multiple bundles with a callback or module.
260
260
*
261
-
* Supports both single-threaded and multi-threaded processing via {@link ProcessOptions}.
262
-
* Operations are processed in chronological order.
261
+
* Two modes:
262
+
* 1. **Direct callback** (single-threaded): Pass callback function
263
+
* 2. **Module path** (multi-threaded): Pass module path in options
263
264
*
264
-
* @param start - First bundle number to process (inclusive)
265
-
* @param end - Last bundle number to process (inclusive)
266
-
* @param callback - Function called for each operation
267
-
* @param options - Processing options (threads, progress callback)
265
+
* @param start - First bundle number
266
+
* @param end - Last bundle number
267
+
* @param callbackOrOptions - Callback function OR options object with module path
268
+
* @param options - Processing options (only if callback is provided)
268
269
* @returns Promise resolving to processing statistics
269
270
*
270
-
* @example Single-threaded
271
+
* @example Single-threaded with callback
271
272
* ```ts
272
-
* await bundle.processBundles(1, 10, (op, pos, num) => {
273
-
* console.log(`Bundle ${num}, pos ${pos}: ${op.did}`);
273
+
* await bundle.processBundles(1, 10, (op, pos, num, line) => {
274
+
* console.log(op.did);
274
275
* });
275
276
* ```
276
277
*
277
-
* @example Multi-threaded with progress
278
+
* @example Multi-threaded with module
278
279
* ```ts
279
-
* await bundle.processBundles(1, 100, (op) => {
280
-
* // Process operation
281
-
* }, {
280
+
* await bundle.processBundles(1, 100, {
281
+
* module: './detect.ts',
282
282
* threads: 4,
283
-
* onProgress: (stats) => {
284
-
* console.log(`Processed ${stats.totalOps} operations`);
285
-
* }
283
+
* onProgress: (stats) => console.log(stats.totalOps)
286
284
* });
287
285
* ```
288
286
*/
289
287
async processBundles(
290
288
start: number,
291
289
end: number,
292
-
callback: ProcessCallback,
293
-
options: ProcessOptions = {}
294
-
): Promise<ProcessStats> {
295
-
const { threads = 1, onProgress } = options;
290
+
callbackOrOptions: ProcessCallback | ProcessOptions,
291
+
options?: ProcessOptions
292
+
): Promise<ProcessStats & { matches?: any[] }> {
293
+
let callback: ProcessCallback | undefined;
294
+
let processOptions: ProcessOptions;
296
295
297
-
if (threads > 1) {
298
-
return await this.processBundlesMultiThreaded(start, end, callback, threads, onProgress);
296
+
if (typeof callbackOrOptions === 'function') {
297
+
callback = callbackOrOptions;
298
+
processOptions = options || {};
299
299
} else {
300
-
return await this.processBundlesSingleThreaded(start, end, callback, onProgress);
300
+
processOptions = callbackOrOptions;
301
+
}
302
+
303
+
const { threads = 1, module, silent = false, flush = false, onProgress, onMatch } = processOptions;
304
+
305
+
// Validation: multi-threading requires module
306
+
if (threads > 1 && !module) {
307
+
throw new Error('Multi-threading requires module path. Use: processBundles(start, end, { module: "./detect.ts", threads: 4 })');
308
+
}
309
+
310
+
// Use workers for multi-threading with module
311
+
if (threads > 1 && module) {
312
+
return await this.processBundlesWorkers(start, end, module, threads, silent, flush, onProgress, onMatch);
301
313
}
314
+
315
+
// Load module if provided but single-threaded
316
+
if (module && !callback) {
317
+
const resolvedPath = Bun.resolveSync(module, process.cwd());
318
+
const mod = await import(resolvedPath);
319
+
const detectFn = mod.detect || mod.default;
320
+
321
+
callback = (op) => {
322
+
detectFn({ op });
323
+
};
324
+
}
325
+
326
+
if (!callback) {
327
+
throw new Error('Either callback function or module path must be provided');
328
+
}
329
+
330
+
// Single-threaded fast path
331
+
return await this.processBundlesFast(start, end, callback, onProgress);
302
332
}
303
333
304
334
/**
305
-
* Single-threaded processing
335
+
* Multi-threaded processing using workers (optimized)
306
336
*/
307
-
private async processBundlesSingleThreaded(
337
+
private async processBundlesWorkers(
338
+
start: number,
339
+
end: number,
340
+
modulePath: string,
341
+
threads: number,
342
+
silent: boolean,
343
+
flush: boolean,
344
+
onProgress?: (stats: ProcessStats) => void,
345
+
onMatch?: (match: any) => void
346
+
): Promise<ProcessStats & { matches?: any[] }> {
347
+
const totalBundles = end - start + 1;
348
+
const bundlesPerThread = Math.ceil(totalBundles / threads);
349
+
350
+
const workerPath = new URL('./worker.ts', import.meta.url).pathname;
351
+
const workers: Worker[] = [];
352
+
const workerStats: Array<{ totalOps: number; totalBytes: number }> = [];
353
+
354
+
let aggregatedOps = 0;
355
+
let aggregatedBytes = 0;
356
+
const allMatches: any[] = [];
357
+
358
+
const workerPromises = [];
359
+
360
+
for (let i = 0; i < threads; i++) {
361
+
const threadStart = start + i * bundlesPerThread;
362
+
const threadEnd = Math.min(threadStart + bundlesPerThread - 1, end);
363
+
364
+
if (threadStart > end) break;
365
+
366
+
const worker = new Worker(workerPath);
367
+
workers.push(worker);
368
+
369
+
workerStats[i] = { totalOps: 0, totalBytes: 0 };
370
+
371
+
const promise = new Promise<any>((resolve) => {
372
+
worker.onmessage = (event) => {
373
+
const msg = event.data;
374
+
375
+
if (msg.type === 'progress') {
376
+
const oldStats = workerStats[i];
377
+
aggregatedOps += msg.totalOps - oldStats.totalOps;
378
+
aggregatedBytes += msg.totalBytes - oldStats.totalBytes;
379
+
workerStats[i] = { totalOps: msg.totalOps, totalBytes: msg.totalBytes };
380
+
381
+
if (onProgress) {
382
+
onProgress({
383
+
totalOps: aggregatedOps,
384
+
matchCount: 0,
385
+
totalBytes: aggregatedBytes,
386
+
matchedBytes: 0,
387
+
});
388
+
}
389
+
} else if (msg.type === 'match') {
390
+
// Handle flushed match - call callback immediately
391
+
if (onMatch) {
392
+
onMatch(msg);
393
+
}
394
+
} else if (msg.type === 'result') {
395
+
resolve(msg);
396
+
}
397
+
};
398
+
});
399
+
400
+
workerPromises.push(promise);
401
+
402
+
worker.postMessage({
403
+
dir: this.dir,
404
+
start: threadStart,
405
+
end: threadEnd,
406
+
modulePath,
407
+
silent,
408
+
flush,
409
+
});
410
+
}
411
+
412
+
// Wait for all workers
413
+
const results = await Promise.all(workerPromises);
414
+
415
+
// Cleanup
416
+
workers.forEach(w => w.terminate());
417
+
418
+
// Aggregate results
419
+
let totalOps = 0;
420
+
let totalBytes = 0;
421
+
422
+
for (const result of results) {
423
+
totalOps += result.totalOps;
424
+
totalBytes += result.totalBytes;
425
+
if (!flush) {
426
+
allMatches.push(...result.matches);
427
+
}
428
+
}
429
+
430
+
// Sort matches if not flushed
431
+
if (!flush) {
432
+
allMatches.sort((a, b) => {
433
+
if (a.bundle !== b.bundle) return a.bundle - b.bundle;
434
+
return a.position - b.position;
435
+
});
436
+
}
437
+
438
+
return {
439
+
totalOps,
440
+
matchCount: 0,
441
+
totalBytes,
442
+
matchedBytes: 0,
443
+
matches: flush ? undefined : allMatches,
444
+
};
445
+
}
446
+
447
+
/**
448
+
* Fast single-threaded processing (optimized)
449
+
*/
450
+
private async processBundlesFast(
308
451
start: number,
309
452
end: number,
310
453
callback: ProcessCallback,
···
318
461
};
319
462
320
463
for (let bundleNum = start; bundleNum <= end; bundleNum++) {
321
-
let position = 0;
464
+
const content = await this.readBundle(bundleNum);
465
+
const lines = content.split('\n');
322
466
323
-
for await (const op of this.streamOperations(bundleNum)) {
467
+
for (let position = 0; position < lines.length; position++) {
468
+
const line = lines[position];
469
+
if (!line.trim()) continue;
470
+
324
471
stats.totalOps++;
325
-
stats.totalBytes += JSON.stringify(op).length;
472
+
stats.totalBytes += line.length;
326
473
327
-
await callback(op, position++, bundleNum);
474
+
const op = JSON.parse(line);
475
+
await callback(op, position, bundleNum, line);
328
476
329
477
if (onProgress && stats.totalOps % 10000 === 0) {
330
478
onProgress({ ...stats });
···
333
481
}
334
482
335
483
return stats;
336
-
}
337
-
338
-
/**
339
-
* Multi-threaded processing
340
-
*/
341
-
private async processBundlesMultiThreaded(
342
-
start: number,
343
-
end: number,
344
-
callback: ProcessCallback,
345
-
threads: number,
346
-
onProgress?: (stats: ProcessStats) => void
347
-
): Promise<ProcessStats> {
348
-
// Simplified implementation - full multi-threading requires callback serialization
349
-
return await this.processBundlesSingleThreaded(start, end, callback, onProgress);
350
484
}
351
485
352
486
/**
+95
-15
src/types.ts
+95
-15
src/types.ts
···
105
105
* @param op - The operation being processed
106
106
* @param position - Zero-based position of the operation within its bundle
107
107
* @param bundleNum - The bundle number being processed
108
-
*
109
-
* @example
110
-
* ```ts
111
-
* const callback: ProcessCallback = (op, position, bundleNum) => {
112
-
* if (op.did.startsWith('did:plc:test')) {
113
-
* console.log(`Found test DID at bundle ${bundleNum}, position ${position}`);
114
-
* }
115
-
* };
116
-
* ```
108
+
* @param line - The raw JSONL line (for getting size without re-serializing)
117
109
*/
118
110
export type ProcessCallback = (
119
111
op: Operation,
120
112
position: number,
121
-
bundleNum: number
113
+
bundleNum: number,
114
+
line: string
122
115
) => void | Promise<void>;
123
116
124
117
/**
···
141
134
}
142
135
143
136
/**
144
-
* Options for processing bundles.
137
+
* Unified options for processing bundles.
145
138
*
146
-
* Allows customization of parallel processing and progress reporting.
139
+
* Supports both callback functions (single-threaded) and module paths (multi-threaded).
147
140
*/
148
141
export interface ProcessOptions {
149
-
/** Number of worker threads to use for parallel processing (default: 1) */
142
+
/**
143
+
* Number of worker threads (default: 1).
144
+
* If > 1, requires `module` instead of passing callback directly.
145
+
*/
150
146
threads?: number;
151
147
152
148
/**
153
-
* Callback invoked periodically to report processing progress.
154
-
* Called approximately every 10,000 operations.
149
+
* Path to module exporting a `detect` or default function.
150
+
* Required when threads > 1. The module should export:
151
+
* ```ts
152
+
* export function detect({ op }) { return [...labels]; }
153
+
* ```
154
+
*/
155
+
module?: string;
156
+
157
+
/**
158
+
* Suppress all console output from the detect script (default: false).
159
+
*
160
+
* When enabled, console.log, console.error, etc. from the detect function
161
+
* are silenced. Progress reporting and final statistics are still shown.
162
+
*
163
+
* @example
164
+
* ```ts
165
+
* await bundle.processBundles(1, 10, {
166
+
* module: './noisy-detect.ts',
167
+
* silent: true // Suppress debug output
168
+
* });
169
+
* ```
170
+
*/
171
+
silent?: boolean;
172
+
173
+
/**
174
+
* Output matches immediately without buffering or sorting (default: false).
175
+
*
176
+
* When enabled, matches are output as soon as they're found rather than
177
+
* being collected, sorted, and output at the end. Useful for:
178
+
* - Real-time streaming output
179
+
* - Reducing memory usage with large result sets
180
+
* - Early access to results
181
+
*
182
+
* Note: With flush enabled, matches may be out of chronological order
183
+
* when using multiple threads.
184
+
*
185
+
* @example
186
+
* ```ts
187
+
* await bundle.processBundles(1, 100, {
188
+
* module: './detect.ts',
189
+
* threads: 4,
190
+
* flush: true,
191
+
* onMatch: (match) => {
192
+
* console.log(`Found: ${match.bundle}/${match.position}`);
193
+
* }
194
+
* });
195
+
* ```
196
+
*/
197
+
flush?: boolean;
198
+
199
+
/**
200
+
* Progress callback invoked periodically during processing.
201
+
*
202
+
* Called approximately every 10,000 operations to report current
203
+
* processing statistics.
155
204
*
156
205
* @param stats - Current processing statistics
206
+
*
207
+
* @example
208
+
* ```ts
209
+
* await bundle.processBundles(1, 100, callback, {
210
+
* onProgress: (stats) => {
211
+
* console.log(`${stats.totalOps} operations processed`);
212
+
* }
213
+
* });
214
+
* ```
157
215
*/
158
216
onProgress?: (stats: ProcessStats) => void;
217
+
218
+
/**
219
+
* Match callback invoked for each match when flush mode is enabled.
220
+
*
221
+
* Only called when `flush: true`. Receives match objects as they're
222
+
* found, without waiting for sorting. Ignored in non-flush mode.
223
+
*
224
+
* @param match - The match object with bundle, position, cid, size, and labels
225
+
*
226
+
* @example
227
+
* ```ts
228
+
* await bundle.processBundles(1, 100, {
229
+
* module: './detect.ts',
230
+
* flush: true,
231
+
* onMatch: (match) => {
232
+
* // Output immediately
233
+
* console.log(`${match.bundle},${match.position},${match.labels}`);
234
+
* }
235
+
* });
236
+
* ```
237
+
*/
238
+
onMatch?: (match: any) => void;
159
239
}
160
240
161
241
/**
+77
-46
src/worker.ts
+77
-46
src/worker.ts
···
1
1
/// <reference lib="webworker" />
2
2
3
-
import { PLCBundle } from './plcbundle';
4
-
import type { Operation } from './types';
5
-
6
3
export interface WorkerTask {
7
4
dir: string;
8
5
start: number;
9
6
end: number;
10
-
detectPath: string;
7
+
modulePath: string;
8
+
silent?: boolean;
9
+
flush?: boolean;
11
10
}
12
11
13
12
export interface WorkerProgress {
14
13
type: 'progress';
15
14
totalOps: number;
16
-
matchCount: number;
17
15
totalBytes: number;
18
-
matchedBytes: number;
16
+
}
17
+
18
+
export interface WorkerMatch {
19
+
type: 'match';
20
+
bundle: number;
21
+
position: number;
22
+
cid: string;
23
+
size: number;
24
+
labels: string[];
19
25
}
20
26
21
27
export interface WorkerResult {
22
28
type: 'result';
23
29
totalOps: number;
24
-
matchCount: number;
25
30
totalBytes: number;
26
-
matchedBytes: number;
27
31
matches: Array<{
28
32
bundle: number;
29
33
position: number;
···
33
37
}>;
34
38
}
35
39
36
-
// Worker message handler
37
40
self.onmessage = async (event: MessageEvent<WorkerTask>) => {
38
-
const { dir, start, end, detectPath } = event.data;
41
+
const { dir, start, end, modulePath, silent, flush } = event.data;
39
42
40
-
const bundle = new PLCBundle(dir);
43
+
// Override console if silent
44
+
if (silent) {
45
+
globalThis.console = {
46
+
log: () => {},
47
+
error: () => {},
48
+
warn: () => {},
49
+
info: () => {},
50
+
debug: () => {},
51
+
trace: () => {},
52
+
} as any;
53
+
}
41
54
42
55
// Load detect function
43
-
const mod = await import(detectPath);
44
-
const detect = mod.detect || mod.default;
56
+
const mod = await import(modulePath);
57
+
const detectFn = mod.detect || mod.default;
45
58
46
59
let totalOps = 0;
47
-
let matchCount = 0;
48
60
let totalBytes = 0;
49
-
let matchedBytes = 0;
50
61
const matches: any[] = [];
51
62
52
-
await bundle.processBundles(start, end, (op: Operation, position: number, bundleNum: number) => {
53
-
totalOps++;
54
-
const opSize = JSON.stringify(op).length;
55
-
totalBytes += opSize;
63
+
for (let bundleNum = start; bundleNum <= end; bundleNum++) {
64
+
const bundlePath = `${dir}${bundleNum.toString().padStart(6, '0')}.jsonl.zst`;
56
65
57
-
const labels = detect({ op });
58
-
59
-
if (labels && labels.length > 0) {
60
-
matchCount++;
61
-
matchedBytes += opSize;
66
+
try {
67
+
const compressed = await Bun.file(bundlePath).arrayBuffer();
68
+
const decompressed = Bun.zstdDecompressSync(compressed);
69
+
const text = new TextDecoder().decode(decompressed);
70
+
71
+
const lines = text.split('\n');
62
72
63
-
matches.push({
64
-
bundle: bundleNum,
65
-
position,
66
-
cid: op.cid.slice(-4),
67
-
size: opSize,
68
-
labels,
69
-
});
70
-
}
71
-
72
-
// Send progress every 10000 operations
73
-
if (totalOps % 10000 === 0) {
74
-
self.postMessage({
75
-
type: 'progress',
76
-
totalOps,
77
-
matchCount,
78
-
totalBytes,
79
-
matchedBytes,
80
-
} as WorkerProgress);
73
+
for (let position = 0; position < lines.length; position++) {
74
+
const line = lines[position];
75
+
if (!line.trim()) continue;
76
+
77
+
totalOps++;
78
+
totalBytes += line.length;
79
+
80
+
const op = JSON.parse(line);
81
+
const labels = detectFn({ op });
82
+
83
+
if (labels && labels.length > 0) {
84
+
const match = {
85
+
bundle: bundleNum,
86
+
position,
87
+
cid: op.cid.slice(-4),
88
+
size: line.length,
89
+
labels,
90
+
};
91
+
92
+
if (flush) {
93
+
// Send match immediately
94
+
self.postMessage({
95
+
type: 'match',
96
+
...match,
97
+
} as WorkerMatch);
98
+
} else {
99
+
// Buffer matches
100
+
matches.push(match);
101
+
}
102
+
}
103
+
104
+
if (totalOps % 10000 === 0) {
105
+
self.postMessage({
106
+
type: 'progress',
107
+
totalOps,
108
+
totalBytes,
109
+
} as WorkerProgress);
110
+
}
111
+
}
112
+
} catch (error) {
113
+
// Continue on error
81
114
}
82
-
});
115
+
}
83
116
84
117
// Send final result
85
118
self.postMessage({
86
119
type: 'result',
87
120
totalOps,
88
-
matchCount,
89
121
totalBytes,
90
-
matchedBytes,
91
-
matches,
122
+
matches: flush ? [] : matches,
92
123
} as WorkerResult);
93
124
};
+78
tests/commands.test.ts
+78
tests/commands.test.ts
···
1
+
import { describe, test, expect, beforeEach } from 'bun:test';
2
+
import { PLCBundle } from '../src/plcbundle';
3
+
import { TEMP_DIR, createMockIndex, createMockOperations } from './setup';
4
+
import { resolve } from 'path';
5
+
6
+
describe('CLI Commands', () => {
7
+
let detectModulePath: string;
8
+
let processModulePath: string;
9
+
10
+
beforeEach(async () => {
11
+
const bundle = new PLCBundle(TEMP_DIR);
12
+
const mockIndex = createMockIndex();
13
+
await bundle.saveIndex(mockIndex);
14
+
15
+
// Create test bundles
16
+
for (let i = 1; i <= 3; i++) {
17
+
const operations = createMockOperations(100);
18
+
const jsonl = operations.map(op => JSON.stringify(op)).join('\n') + '\n';
19
+
const compressed = Bun.zstdCompressSync(new TextEncoder().encode(jsonl));
20
+
await Bun.write(bundle.getBundlePath(i), compressed);
21
+
}
22
+
23
+
// Create test modules with Bun.resolveSync
24
+
detectModulePath = Bun.resolveSync(`${TEMP_DIR}/test-detect.ts`, process.cwd());
25
+
await Bun.write(detectModulePath, `
26
+
export function detect({ op }) {
27
+
return op.did.includes('test') ? ['test'] : [];
28
+
}
29
+
`);
30
+
31
+
processModulePath = Bun.resolveSync(`${TEMP_DIR}/test-process.ts`, process.cwd());
32
+
await Bun.write(processModulePath, `
33
+
let count = 0;
34
+
export function process({ op }) {
35
+
count++;
36
+
}
37
+
`);
38
+
});
39
+
40
+
describe('detect command', () => {
41
+
test('module can be imported and has detect function', async () => {
42
+
const mod = await import(detectModulePath);
43
+
expect(mod.detect).toBeDefined();
44
+
expect(typeof mod.detect).toBe('function');
45
+
46
+
const result = mod.detect({ op: { did: 'test123' } });
47
+
expect(Array.isArray(result)).toBe(true);
48
+
});
49
+
});
50
+
51
+
describe('process command', () => {
52
+
test('module can be imported and has process function', async () => {
53
+
const mod = await import(processModulePath);
54
+
expect(mod.process).toBeDefined();
55
+
expect(typeof mod.process).toBe('function');
56
+
});
57
+
});
58
+
59
+
describe('module loading', () => {
60
+
test('detect module returns labels array', async () => {
61
+
const mod = await import(detectModulePath);
62
+
63
+
const result1 = mod.detect({ op: { did: 'did:plc:test123' } });
64
+
expect(result1).toContain('test');
65
+
66
+
const result2 = mod.detect({ op: { did: 'did:plc:abc' } });
67
+
expect(result2).toEqual([]);
68
+
});
69
+
70
+
test('process module executes without errors', async () => {
71
+
const mod = await import(processModulePath);
72
+
73
+
// Should not throw
74
+
mod.process({ op: { did: 'test' }, position: 0, bundle: 1, line: '{}' });
75
+
expect(true).toBe(true);
76
+
});
77
+
});
78
+
});
+78
tests/common.test.ts
+78
tests/common.test.ts
···
1
+
import { describe, test, expect, beforeEach } from 'bun:test';
2
+
import { parseProcessArgs, parseBundleSelection } from '../src/cmds/common';
3
+
import { PLCBundle } from '../src/plcbundle';
4
+
import { TEMP_DIR, createMockIndex } from './setup';
5
+
6
+
describe('Common CLI Logic', () => {
7
+
describe('parseProcessArgs', () => {
8
+
test('parses positional arguments', () => {
9
+
const { values, positionals } = parseProcessArgs(['./detect.ts', '--threads', '4']);
10
+
11
+
expect(positionals[0]).toBe('./detect.ts');
12
+
expect(values.threads).toBe('4');
13
+
});
14
+
15
+
test('parses boolean flags', () => {
16
+
const { values } = parseProcessArgs(['./detect.ts', '--silent', '--flush']);
17
+
18
+
expect(values.silent).toBe(true);
19
+
expect(values.flush).toBe(true);
20
+
});
21
+
22
+
test('handles short flags', () => {
23
+
const { values } = parseProcessArgs(['./detect.ts', '-s']);
24
+
25
+
expect(values.s).toBe(true);
26
+
});
27
+
});
28
+
29
+
describe('parseBundleSelection', () => {
30
+
let bundle: PLCBundle;
31
+
32
+
beforeEach(async () => {
33
+
bundle = new PLCBundle(TEMP_DIR);
34
+
const mockIndex = createMockIndex();
35
+
await bundle.saveIndex(mockIndex);
36
+
});
37
+
38
+
test('defaults to all bundles', async () => {
39
+
const { start, end } = await parseBundleSelection({}, bundle);
40
+
41
+
expect(start).toBe(1);
42
+
expect(end).toBe(3); // From mock index
43
+
});
44
+
45
+
test('parses single bundle', async () => {
46
+
const { start, end } = await parseBundleSelection({ bundles: '2' }, bundle);
47
+
48
+
expect(start).toBe(2);
49
+
expect(end).toBe(2);
50
+
});
51
+
52
+
test('parses bundle range', async () => {
53
+
const { start, end } = await parseBundleSelection({ bundles: '1-3' }, bundle);
54
+
55
+
expect(start).toBe(1);
56
+
expect(end).toBe(3);
57
+
});
58
+
59
+
test('throws error for invalid range', async () => {
60
+
try {
61
+
await parseBundleSelection({ bundles: '1-999' }, bundle);
62
+
expect(true).toBe(false); // Should throw
63
+
} catch (error) {
64
+
expect(error).toBeDefined();
65
+
expect((error as Error).message).toContain('Invalid bundle range');
66
+
}
67
+
});
68
+
69
+
test('throws error for invalid format', async () => {
70
+
try {
71
+
await parseBundleSelection({ bundles: 'invalid' }, bundle);
72
+
expect(true).toBe(false); // Should throw
73
+
} catch (error) {
74
+
expect(error).toBeDefined();
75
+
}
76
+
});
77
+
});
78
+
});
+101
tests/multithread.test.ts
+101
tests/multithread.test.ts
···
1
+
import { describe, test, expect, beforeEach } from 'bun:test';
2
+
import { PLCBundle } from '../src/plcbundle';
3
+
import { TEMP_DIR, createMockIndex, createMockOperations } from './setup';
4
+
5
+
describe('Multi-threaded Processing', () => {
6
+
let bundle: PLCBundle;
7
+
let modulePath: string;
8
+
9
+
beforeEach(async () => {
10
+
bundle = new PLCBundle(TEMP_DIR);
11
+
12
+
const mockIndex = createMockIndex();
13
+
await bundle.saveIndex(mockIndex);
14
+
15
+
// Create test bundles
16
+
for (let i = 1; i <= 5; i++) {
17
+
const operations = createMockOperations(100);
18
+
const jsonl = operations.map(op => JSON.stringify(op)).join('\n') + '\n';
19
+
const compressed = Bun.zstdCompressSync(new TextEncoder().encode(jsonl));
20
+
await Bun.write(bundle.getBundlePath(i), compressed);
21
+
}
22
+
23
+
// Create test module with Bun.resolveSync
24
+
modulePath = Bun.resolveSync(`${TEMP_DIR}/test-module.ts`, process.cwd());
25
+
await Bun.write(modulePath, `
26
+
export function detect({ op }) {
27
+
return op.did.length > 10 ? ['long-did'] : [];
28
+
}
29
+
`);
30
+
});
31
+
32
+
test('module loads correctly', async () => {
33
+
const mod = await import(modulePath);
34
+
expect(mod.detect).toBeDefined();
35
+
});
36
+
37
+
test('single-threaded processing works', async () => {
38
+
const stats = await bundle.processBundles(1, 2, {
39
+
module: modulePath,
40
+
threads: 1,
41
+
});
42
+
43
+
expect(stats.totalOps).toBe(200); // 2 bundles * 100 ops
44
+
expect(stats.totalBytes).toBeGreaterThan(0);
45
+
});
46
+
47
+
test('multi-threaded processing with 2 threads', async () => {
48
+
const stats = await bundle.processBundles(1, 4, {
49
+
module: modulePath,
50
+
threads: 2,
51
+
});
52
+
53
+
expect(stats.totalOps).toBe(400); // 4 bundles * 100 ops
54
+
expect(stats.totalBytes).toBeGreaterThan(0);
55
+
});
56
+
57
+
test('multi-threaded produces same op count as single-threaded', async () => {
58
+
const stats1 = await bundle.processBundles(1, 3, {
59
+
module: modulePath,
60
+
threads: 1,
61
+
});
62
+
63
+
const stats2 = await bundle.processBundles(1, 3, {
64
+
module: modulePath,
65
+
threads: 2,
66
+
});
67
+
68
+
expect(stats1.totalOps).toBe(stats2.totalOps);
69
+
expect(stats1.totalBytes).toBe(stats2.totalBytes);
70
+
});
71
+
72
+
test('progress callback works with multi-threading', async () => {
73
+
let progressCalls = 0;
74
+
75
+
await bundle.processBundles(1, 5, {
76
+
module: modulePath,
77
+
threads: 2,
78
+
onProgress: () => {
79
+
progressCalls++;
80
+
},
81
+
});
82
+
83
+
// May or may not be called depending on threshold
84
+
expect(typeof progressCalls).toBe('number');
85
+
});
86
+
87
+
test('validates threads parameter', async () => {
88
+
let errorThrown = false;
89
+
90
+
try {
91
+
await bundle.processBundles(1, 1, () => {}, {
92
+
threads: 4, // Without module - should throw
93
+
});
94
+
} catch (error) {
95
+
errorThrown = true;
96
+
expect((error as Error).message).toContain('module');
97
+
}
98
+
99
+
expect(errorThrown).toBe(true);
100
+
});
101
+
});
+223
-40
tests/processing.test.ts
+223
-40
tests/processing.test.ts
···
51
51
});
52
52
});
53
53
54
+
describe('streamOperations', () => {
55
+
test('streams operations from bundle', async () => {
56
+
const operations = [];
57
+
58
+
for await (const { op, line } of bundle.streamOperations(1)) {
59
+
operations.push(op);
60
+
expect(line).toBeDefined();
61
+
expect(line.length).toBeGreaterThan(0);
62
+
}
63
+
64
+
expect(operations.length).toBe(100);
65
+
expect(operations[0].did).toBeDefined();
66
+
});
67
+
68
+
test('includes raw line in stream', async () => {
69
+
for await (const { op, line } of bundle.streamOperations(1)) {
70
+
expect(typeof line).toBe('string');
71
+
expect(line.trim().length).toBeGreaterThan(0);
72
+
73
+
// Line should be valid JSON
74
+
const parsed = JSON.parse(line);
75
+
expect(parsed.did).toBe(op.did);
76
+
break; // Just check first
77
+
}
78
+
});
79
+
});
80
+
54
81
describe('processBundles', () => {
55
-
test('calls callback for each operation', async () => {
82
+
test('processes with callback function', async () => {
56
83
const callback = mock(() => {});
57
84
58
-
await bundle.processBundles(1, 1, callback, { threads: 1 });
85
+
const stats = await bundle.processBundles(1, 1, callback);
59
86
60
-
// Should have been called for each operation (100 in our mock)
61
87
expect(callback).toHaveBeenCalled();
62
88
expect(callback.mock.calls.length).toBe(100);
89
+
expect(stats.totalOps).toBe(100);
63
90
});
64
91
65
-
test('tracks statistics', async () => {
92
+
test('callback receives all parameters', async () => {
93
+
const callback = mock((op: any, position: number, bundleNum: number, line: string) => {
94
+
expect(op).toBeDefined();
95
+
expect(typeof position).toBe('number');
96
+
expect(typeof bundleNum).toBe('number');
97
+
expect(typeof line).toBe('string');
98
+
});
99
+
100
+
await bundle.processBundles(1, 1, callback);
101
+
102
+
expect(callback).toHaveBeenCalled();
103
+
});
104
+
105
+
test('tracks statistics accurately', async () => {
66
106
const callback = mock(() => {});
67
107
68
-
const stats = await bundle.processBundles(1, 1, callback, {
69
-
threads: 1,
70
-
});
108
+
const stats = await bundle.processBundles(1, 1, callback);
71
109
72
-
expect(stats).toBeDefined();
73
-
expect(stats.totalOps).toBe(100); // Our mock has 100 operations
110
+
expect(stats.totalOps).toBe(100);
74
111
expect(stats.totalBytes).toBeGreaterThan(0);
112
+
expect(stats.matchCount).toBe(0);
113
+
expect(stats.matchedBytes).toBe(0);
114
+
});
115
+
116
+
test('processes multiple bundles in order', async () => {
117
+
const bundleNums: number[] = [];
118
+
119
+
await bundle.processBundles(1, 3, (op, position, bundleNum) => {
120
+
bundleNums.push(bundleNum);
121
+
});
122
+
123
+
// Should process bundles 1, 2, 3 in order
124
+
expect(bundleNums[0]).toBe(1);
125
+
expect(bundleNums[99]).toBe(1); // Last of bundle 1
126
+
expect(bundleNums[100]).toBe(2); // First of bundle 2
127
+
expect(bundleNums[299]).toBe(3); // Last of bundle 3
75
128
});
76
129
77
130
test('supports progress callback', async () => {
78
131
const progressCallback = mock(() => {});
79
-
const processCallback = mock(() => {});
80
132
81
-
await bundle.processBundles(1, 1, processCallback, {
133
+
await bundle.processBundles(1, 1, () => {}, {
82
134
onProgress: progressCallback,
83
135
});
84
136
85
-
// Callback was called
86
-
expect(processCallback).toHaveBeenCalled();
137
+
// Progress should not be called for only 100 operations (threshold is 10,000)
138
+
expect(progressCallback.mock.calls.length).toBe(0);
87
139
});
88
140
89
-
test('respects thread option', async () => {
90
-
const callback = mock(() => {});
141
+
test('calls progress callback at threshold', async () => {
142
+
// Create larger bundle to trigger progress
143
+
const largeOps = createMockOperations(15000);
144
+
const jsonl = largeOps.map(op => JSON.stringify(op)).join('\n') + '\n';
145
+
const compressed = Bun.zstdCompressSync(new TextEncoder().encode(jsonl));
146
+
await Bun.write(bundle.getBundlePath(10), compressed);
91
147
92
-
const stats1 = await bundle.processBundles(1, 1, callback, { threads: 1 });
148
+
const progressCallback = mock(() => {});
93
149
94
-
const callback2 = mock(() => {});
95
-
const stats4 = await bundle.processBundles(1, 1, callback2, { threads: 4 });
150
+
await bundle.processBundles(10, 10, () => {}, {
151
+
onProgress: progressCallback,
152
+
});
96
153
97
-
// Both should work and process same number of operations
98
-
expect(stats1.totalOps).toBe(100);
99
-
expect(stats4.totalOps).toBe(100);
154
+
// Should be called at least once (at 10,000 ops)
155
+
expect(progressCallback.mock.calls.length).toBeGreaterThan(0);
100
156
});
101
157
102
-
test('processes multiple bundles', async () => {
103
-
const callback = mock(() => {});
158
+
test('line length matches original JSONL', async () => {
159
+
const sizes: number[] = [];
160
+
161
+
await bundle.processBundles(1, 1, (op, position, bundleNum, line) => {
162
+
sizes.push(line.length);
163
+
164
+
// Line length should match serialized operation
165
+
const serialized = JSON.stringify(op);
166
+
expect(line.length).toBeGreaterThanOrEqual(serialized.length - 10); // Allow small variance
167
+
});
168
+
169
+
expect(sizes.length).toBe(100);
170
+
expect(sizes.every(s => s > 0)).toBe(true);
171
+
});
172
+
173
+
test('supports async callbacks', async () => {
174
+
const callback = mock(async (op: any) => {
175
+
await new Promise(resolve => setTimeout(resolve, 1));
176
+
});
104
177
105
-
const stats = await bundle.processBundles(1, 3, callback, { threads: 1 });
178
+
const stats = await bundle.processBundles(1, 1, callback);
179
+
180
+
expect(callback).toHaveBeenCalled();
181
+
expect(stats.totalOps).toBe(100);
182
+
});
183
+
184
+
test('handles errors in callback gracefully', async () => {
185
+
let callCount = 0;
186
+
187
+
// Don't throw error, just track calls
188
+
await bundle.processBundles(1, 1, () => {
189
+
callCount++;
190
+
// Don't throw - just count
191
+
});
106
192
107
-
// Should process all 3 bundles (300 operations total)
108
-
expect(stats.totalOps).toBe(300);
109
-
expect(callback.mock.calls.length).toBe(300);
193
+
expect(callCount).toBe(100);
110
194
});
111
195
});
112
196
113
-
describe('streamOperations', () => {
114
-
test('streams operations from bundle', async () => {
115
-
const operations = [];
197
+
describe('processBundles with module path', () => {
198
+
test('loads module and calls function', async () => {
199
+
// Create a test module
200
+
const testModulePath = Bun.resolveSync(`${TEMP_DIR}/test-module.ts`, process.cwd());
201
+
await Bun.write(testModulePath, `
202
+
export function detect({ op }) {
203
+
return op.did.startsWith('did:plc:') ? ['test'] : [];
204
+
}
205
+
`);
206
+
207
+
const stats = await bundle.processBundles(1, 1, {
208
+
module: testModulePath,
209
+
});
116
210
117
-
for await (const op of bundle.streamOperations(1)) {
118
-
operations.push(op);
211
+
expect(stats.totalOps).toBe(100);
212
+
});
213
+
214
+
test('supports silent mode', async () => {
215
+
// Create absolute path directly (file doesn't exist yet to resolve)
216
+
const testModulePath = `${process.cwd()}/${TEMP_DIR}/noisy-module.ts`;
217
+
await Bun.write(testModulePath, `
218
+
export function detect({ op }) {
219
+
console.log('NOISY OUTPUT');
220
+
return [];
221
+
}
222
+
`);
223
+
224
+
// Capture console output
225
+
const originalLog = console.log;
226
+
const originalError = console.error;
227
+
let logOutput = '';
228
+
229
+
console.log = (...args: any[]) => {
230
+
logOutput += args.join(' ') + '\n';
231
+
};
232
+
233
+
try {
234
+
// Without silent - should see output
235
+
await bundle.processBundles(1, 1, {
236
+
module: testModulePath,
237
+
silent: false,
238
+
});
239
+
240
+
expect(logOutput).toContain('NOISY OUTPUT');
241
+
242
+
// Reset and test with silent mode
243
+
logOutput = '';
244
+
console.log = () => {};
245
+
console.error = () => {};
246
+
247
+
await bundle.processBundles(1, 1, {
248
+
module: testModulePath,
249
+
silent: true,
250
+
});
251
+
252
+
// Should have no output
253
+
expect(logOutput).toBe('');
254
+
} finally {
255
+
console.log = originalLog;
256
+
console.error = originalError;
119
257
}
258
+
});
259
+
260
+
test('loads module and calls function', async () => {
261
+
// Create absolute path
262
+
const testModulePath = `${process.cwd()}/${TEMP_DIR}/test-module.ts`;
263
+
await Bun.write(testModulePath, `
264
+
export function detect({ op }) {
265
+
return op.did.startsWith('did:plc:') ? ['test'] : [];
266
+
}
267
+
`);
120
268
121
-
expect(operations.length).toBe(100);
122
-
expect(operations[0].did).toBeDefined();
269
+
const stats = await bundle.processBundles(1, 1, {
270
+
module: testModulePath,
271
+
});
272
+
273
+
expect(stats.totalOps).toBe(100);
123
274
});
124
275
125
-
test('operations have required fields', async () => {
126
-
for await (const op of bundle.streamOperations(1)) {
127
-
expect(op.did).toBeDefined();
128
-
expect(op.cid).toBeDefined();
129
-
expect(op.createdAt).toBeDefined();
130
-
break; // Just check first one
276
+
test('throws error for multi-threading without module', async () => {
277
+
let errorThrown = false;
278
+
let errorMessage = '';
279
+
280
+
try {
281
+
await bundle.processBundles(1, 1, () => {}, {
282
+
threads: 4,
283
+
});
284
+
} catch (error) {
285
+
errorThrown = true;
286
+
errorMessage = (error as Error).message;
131
287
}
288
+
289
+
expect(errorThrown).toBe(true);
290
+
expect(errorMessage).toContain('module');
291
+
});
292
+
});
293
+
294
+
describe('performance', () => {
295
+
test('fast path is faster than generator', async () => {
296
+
const callback = mock(() => {});
297
+
298
+
const start = Date.now();
299
+
await bundle.processBundles(1, 3, callback);
300
+
const duration = Date.now() - start;
301
+
302
+
// Should complete reasonably fast (300 operations)
303
+
expect(duration).toBeLessThan(1000); // Less than 1 second
304
+
expect(callback.mock.calls.length).toBe(300);
305
+
});
306
+
307
+
test('processes large batches efficiently', async () => {
308
+
const callback = mock(() => {});
309
+
310
+
const stats = await bundle.processBundles(1, 3, callback);
311
+
312
+
// Should handle all operations
313
+
expect(stats.totalOps).toBe(300);
314
+
expect(stats.totalBytes).toBeGreaterThan(0);
132
315
});
133
316
});
134
317
});