+2
-1
.gitignore
+2
-1
.gitignore
+12
examples/flush-pds.ts
+12
examples/flush-pds.ts
···
···
1
+
// flushes all pds endpoints
2
+
3
+
const unique: Array<string> = []
4
+
5
+
export function process({ op }: { op: any }) {
6
+
7
+
const endpoint = op.operation.services?.atproto_pds?.endpoint
8
+
if (!unique.includes(endpoint)) {
9
+
console.log(endpoint)
10
+
unique.push(endpoint)
11
+
}
12
+
}
+1
-1
src/cli.ts
+1
-1
src/cli.ts
+5
-3
src/cmds/common.ts
+5
-3
src/cmds/common.ts
···
11
silent: boolean;
12
flush?: boolean;
13
mode: 'detect' | 'process';
14
onMatch?: (match: any, matchCount: number, matchedBytes: number) => void;
15
}
16
···
18
* Common processing logic for both detect and process commands
19
*/
20
export async function processOperations(options: ProcessingOptions) {
21
-
const { dir, start, end, modulePath, threads, silent, flush, mode, onMatch } = options;
22
23
const bundle = new PLCBundle(dir);
24
···
58
threads,
59
silent,
60
flush,
61
-
onProgress: (progressStats: ProcessStats) => {
62
const elapsed = (Date.now() - startTime) / 1000;
63
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
64
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
···
129
}
130
},
131
{
132
-
onProgress: (progressStats: ProcessStats) => {
133
const elapsed = (Date.now() - startTime) / 1000;
134
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
135
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
···
182
silent: { type: 'boolean', default: false },
183
s: { type: 'boolean', default: false },
184
flush: { type: 'boolean', default: false },
185
},
186
strict: false,
187
allowPositionals: true,
···
11
silent: boolean;
12
flush?: boolean;
13
mode: 'detect' | 'process';
14
+
noProgress?: boolean;
15
onMatch?: (match: any, matchCount: number, matchedBytes: number) => void;
16
}
17
···
19
* Common processing logic for both detect and process commands
20
*/
21
export async function processOperations(options: ProcessingOptions) {
22
+
const { dir, start, end, modulePath, threads, silent, flush, mode, noProgress, onMatch } = options;
23
24
const bundle = new PLCBundle(dir);
25
···
59
threads,
60
silent,
61
flush,
62
+
onProgress: noProgress ? undefined : (progressStats: ProcessStats) => { // Check noProgress
63
const elapsed = (Date.now() - startTime) / 1000;
64
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
65
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
···
130
}
131
},
132
{
133
+
onProgress: noProgress ? undefined : (progressStats: ProcessStats) => { // Check noProgress
134
const elapsed = (Date.now() - startTime) / 1000;
135
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
136
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
···
183
silent: { type: 'boolean', default: false },
184
s: { type: 'boolean', default: false },
185
flush: { type: 'boolean', default: false },
186
+
'no-progress': { type: 'boolean', default: false }, // Add this
187
},
188
strict: false,
189
allowPositionals: true,
+4
-1
src/cmds/detect.ts
+4
-1
src/cmds/detect.ts
···
18
--threads <num> Number of worker threads (default: 1)
19
--flush Output matches immediately (unsorted)
20
-s, --silent Suppress all console output from detect script
21
22
EXAMPLES:
23
plcbundle-bun detect ./detect.ts
24
plcbundle-bun detect ./detect.ts --bundles 1-50 --threads 4
25
-
plcbundle-bun detect ./detect.ts --flush --silent
26
`);
27
return;
28
}
···
40
const threads = parseInt((values.threads as string) || '1');
41
const silent = Boolean(values.silent || values.s);
42
const flush = Boolean(values.flush);
43
44
const bundle = new PLCBundle(dir);
45
const { start, end } = await parseBundleSelection(values, bundle);
···
53
threads,
54
silent,
55
flush,
56
mode: 'detect',
57
onMatch: (match) => {
58
console.log(`${match.bundle},${match.position},${match.cid},${match.size},0.95,${match.labels.join(';')}`);
···
18
--threads <num> Number of worker threads (default: 1)
19
--flush Output matches immediately (unsorted)
20
-s, --silent Suppress all console output from detect script
21
+
--no-progress Disable progress output (default: false)
22
23
EXAMPLES:
24
plcbundle-bun detect ./detect.ts
25
plcbundle-bun detect ./detect.ts --bundles 1-50 --threads 4
26
+
plcbundle-bun detect ./detect.ts --flush --silent --no-progress
27
`);
28
return;
29
}
···
41
const threads = parseInt((values.threads as string) || '1');
42
const silent = Boolean(values.silent || values.s);
43
const flush = Boolean(values.flush);
44
+
const noProgress = Boolean(values['no-progress']); // Add this
45
46
const bundle = new PLCBundle(dir);
47
const { start, end } = await parseBundleSelection(values, bundle);
···
55
threads,
56
silent,
57
flush,
58
+
noProgress, // Pass it
59
mode: 'detect',
60
onMatch: (match) => {
61
console.log(`${match.bundle},${match.position},${match.cid},${match.size},0.95,${match.labels.join(';')}`);
+6
-1
src/cmds/process.ts
+6
-1
src/cmds/process.ts
···
1
import { parseProcessArgs, parseBundleSelection, processOperations } from './common';
2
import { PLCBundle } from '../plcbundle';
3
4
-
export async function process(args: string[]) {
5
if (args.includes('-h') || args.includes('--help')) {
6
console.log(`
7
process - Process operations with a custom function
···
17
--bundles <spec> Bundle selection: number (42) or range (1-50)
18
--threads <num> Number of worker threads (default: 1)
19
-s, --silent Suppress all console output from process script
20
21
EXAMPLES:
22
plcbundle-bun process ./my-processor.ts
23
plcbundle-bun process ./my-processor.ts --bundles 1-50 --threads 4
24
25
PROCESS FUNCTION:
26
export function process({ op, position, bundle, line }) {
···
42
const dir = (values.dir as string) || './';
43
const threads = parseInt((values.threads as string) || '1');
44
const silent = Boolean(values.silent || values.s);
45
46
const bundle = new PLCBundle(dir);
47
const { start, end } = await parseBundleSelection(values, bundle);
···
54
modulePath: resolvedPath,
55
threads,
56
silent,
57
mode: 'process',
58
});
59
}
···
1
+
import { exit } from 'process';
2
import { parseProcessArgs, parseBundleSelection, processOperations } from './common';
3
import { PLCBundle } from '../plcbundle';
4
5
+
export async function processCmd(args: string[]) {
6
if (args.includes('-h') || args.includes('--help')) {
7
console.log(`
8
process - Process operations with a custom function
···
18
--bundles <spec> Bundle selection: number (42) or range (1-50)
19
--threads <num> Number of worker threads (default: 1)
20
-s, --silent Suppress all console output from process script
21
+
--no-progress Disable progress output (default: false)
22
23
EXAMPLES:
24
plcbundle-bun process ./my-processor.ts
25
plcbundle-bun process ./my-processor.ts --bundles 1-50 --threads 4
26
+
plcbundle-bun process ./my-processor.ts --no-progress
27
28
PROCESS FUNCTION:
29
export function process({ op, position, bundle, line }) {
···
45
const dir = (values.dir as string) || './';
46
const threads = parseInt((values.threads as string) || '1');
47
const silent = Boolean(values.silent || values.s);
48
+
const noProgress = Boolean(values['no-progress']); // Add this
49
50
const bundle = new PLCBundle(dir);
51
const { start, end } = await parseBundleSelection(values, bundle);
···
58
modulePath: resolvedPath,
59
threads,
60
silent,
61
+
noProgress, // Pass it
62
mode: 'process',
63
});
64
}
+39
-7
src/plcbundle.ts
+39
-7
src/plcbundle.ts
···
307
throw new Error('Multi-threading requires module path. Use: processBundles(start, end, { module: "./detect.ts", threads: 4 })');
308
}
309
310
// Use workers for multi-threading with module
311
if (threads > 1 && module) {
312
-
return await this.processBundlesWorkers(start, end, module, threads, silent, flush, onProgress, onMatch);
313
}
314
315
// Load module if provided but single-threaded
316
if (module && !callback) {
317
-
const resolvedPath = Bun.resolveSync(module, process.cwd());
318
const mod = await import(resolvedPath);
319
-
const detectFn = mod.detect || mod.default;
320
321
-
callback = (op) => {
322
-
detectFn({ op });
323
};
324
}
325
···
341
threads: number,
342
silent: boolean,
343
flush: boolean,
344
onProgress?: (stats: ProcessStats) => void,
345
onMatch?: (match: any) => void
346
): Promise<ProcessStats & { matches?: any[] }> {
···
406
modulePath,
407
silent,
408
flush,
409
});
410
}
411
···
428
}
429
430
// Sort matches if not flushed
431
-
if (!flush) {
432
allMatches.sort((a, b) => {
433
if (a.bundle !== b.bundle) return a.bundle - b.bundle;
434
return a.position - b.position;
···
440
matchCount: 0,
441
totalBytes,
442
matchedBytes: 0,
443
-
matches: flush ? undefined : allMatches,
444
};
445
}
446
···
307
throw new Error('Multi-threading requires module path. Use: processBundles(start, end, { module: "./detect.ts", threads: 4 })');
308
}
309
310
+
// Determine mode based on what function is exported
311
+
let mode: 'detect' | 'process' = 'detect';
312
+
if (module) {
313
+
try {
314
+
const mod = await import(module);
315
+
// If module has 'process' function, use process mode
316
+
if (mod.process) {
317
+
mode = 'process';
318
+
} else if (mod.detect) {
319
+
mode = 'detect';
320
+
}
321
+
} catch (e) {
322
+
// Default to detect
323
+
}
324
+
}
325
+
326
// Use workers for multi-threading with module
327
if (threads > 1 && module) {
328
+
return await this.processBundlesWorkers(
329
+
start,
330
+
end,
331
+
module,
332
+
threads,
333
+
silent,
334
+
flush,
335
+
mode, // Pass mode
336
+
onProgress,
337
+
onMatch
338
+
);
339
}
340
341
// Load module if provided but single-threaded
342
if (module && !callback) {
343
+
const resolvedPath = module;
344
const mod = await import(resolvedPath);
345
+
const userFn = mode === 'detect' ? (mod.detect || mod.default) : (mod.process || mod.default);
346
347
+
callback = (op, position, bundleNum, line) => {
348
+
if (mode === 'detect') {
349
+
userFn({ op });
350
+
} else {
351
+
userFn({ op, position, bundle: bundleNum, line });
352
+
}
353
};
354
}
355
···
371
threads: number,
372
silent: boolean,
373
flush: boolean,
374
+
mode: 'detect' | 'process', // Add mode parameter
375
onProgress?: (stats: ProcessStats) => void,
376
onMatch?: (match: any) => void
377
): Promise<ProcessStats & { matches?: any[] }> {
···
437
modulePath,
438
silent,
439
flush,
440
+
mode, // Pass mode to worker
441
});
442
}
443
···
460
}
461
462
// Sort matches if not flushed
463
+
if (!flush && mode === 'detect') {
464
allMatches.sort((a, b) => {
465
if (a.bundle !== b.bundle) return a.bundle - b.bundle;
466
return a.position - b.position;
···
472
matchCount: 0,
473
totalBytes,
474
matchedBytes: 0,
475
+
matches: flush || mode === 'process' ? undefined : allMatches,
476
};
477
}
478
+32
-22
src/worker.ts
+32
-22
src/worker.ts
···
7
modulePath: string;
8
silent?: boolean;
9
flush?: boolean;
10
}
11
12
export interface WorkerProgress {
···
38
}
39
40
self.onmessage = async (event: MessageEvent<WorkerTask>) => {
41
-
const { dir, start, end, modulePath, silent, flush } = event.data;
42
43
// Override console if silent
44
if (silent) {
···
52
} as any;
53
}
54
55
-
// Load detect function
56
const mod = await import(modulePath);
57
-
const detectFn = mod.detect || mod.default;
58
59
let totalOps = 0;
60
let totalBytes = 0;
···
78
totalBytes += line.length;
79
80
const op = JSON.parse(line);
81
-
const labels = detectFn({ op });
82
83
-
if (labels && labels.length > 0) {
84
-
const match = {
85
-
bundle: bundleNum,
86
-
position,
87
-
cid: op.cid.slice(-4),
88
-
size: line.length,
89
-
labels,
90
-
};
91
92
-
if (flush) {
93
-
// Send match immediately
94
-
self.postMessage({
95
-
type: 'match',
96
-
...match,
97
-
} as WorkerMatch);
98
-
} else {
99
-
// Buffer matches
100
-
matches.push(match);
101
}
102
}
103
104
if (totalOps % 10000 === 0) {
···
121
totalBytes,
122
matches: flush ? [] : matches,
123
} as WorkerResult);
124
-
};
···
7
modulePath: string;
8
silent?: boolean;
9
flush?: boolean;
10
+
mode?: 'detect' | 'process'; // Add mode parameter
11
}
12
13
export interface WorkerProgress {
···
39
}
40
41
self.onmessage = async (event: MessageEvent<WorkerTask>) => {
42
+
const { dir, start, end, modulePath, silent, flush, mode = 'detect' } = event.data;
43
44
// Override console if silent
45
if (silent) {
···
53
} as any;
54
}
55
56
+
// Load the appropriate function based on mode
57
const mod = await import(modulePath);
58
+
const userFn = mode === 'detect'
59
+
? (mod.detect || mod.default)
60
+
: (mod.process || mod.default);
61
62
let totalOps = 0;
63
let totalBytes = 0;
···
81
totalBytes += line.length;
82
83
const op = JSON.parse(line);
84
85
+
if (mode === 'detect') {
86
+
// Detection mode - look for labels
87
+
const labels = userFn({ op });
88
89
+
if (labels && labels.length > 0) {
90
+
const match = {
91
+
bundle: bundleNum,
92
+
position,
93
+
cid: op.cid.slice(-4),
94
+
size: line.length,
95
+
labels,
96
+
};
97
+
98
+
if (flush) {
99
+
// Send match immediately
100
+
self.postMessage({
101
+
type: 'match',
102
+
...match,
103
+
} as WorkerMatch);
104
+
} else {
105
+
// Buffer matches
106
+
matches.push(match);
107
+
}
108
}
109
+
} else {
110
+
// Process mode - just call the function
111
+
userFn({ op, position, bundle: bundleNum, line });
112
}
113
114
if (totalOps % 10000 === 0) {
···
131
totalBytes,
132
matches: flush ? [] : matches,
133
} as WorkerResult);
134
+
};
+8
-9
tests/commands.test.ts
+8
-9
tests/commands.test.ts
···
1
import { describe, test, expect, beforeEach } from 'bun:test';
2
import { PLCBundle } from '../src/plcbundle';
3
import { TEMP_DIR, createMockIndex, createMockOperations } from './setup';
4
-
import { resolve } from 'path';
5
6
describe('CLI Commands', () => {
7
let detectModulePath: string;
···
20
await Bun.write(bundle.getBundlePath(i), compressed);
21
}
22
23
-
// Create test modules with Bun.resolveSync
24
-
detectModulePath = Bun.resolveSync(`${TEMP_DIR}/test-detect.ts`, process.cwd());
25
await Bun.write(detectModulePath, `
26
-
export function detect({ op }) {
27
return op.did.includes('test') ? ['test'] : [];
28
-
}
29
`);
30
31
-
processModulePath = Bun.resolveSync(`${TEMP_DIR}/test-process.ts`, process.cwd());
32
await Bun.write(processModulePath, `
33
-
let count = 0;
34
-
export function process({ op }) {
35
count++;
36
-
}
37
`);
38
});
39
···
1
import { describe, test, expect, beforeEach } from 'bun:test';
2
import { PLCBundle } from '../src/plcbundle';
3
import { TEMP_DIR, createMockIndex, createMockOperations } from './setup';
4
5
describe('CLI Commands', () => {
6
let detectModulePath: string;
···
19
await Bun.write(bundle.getBundlePath(i), compressed);
20
}
21
22
+
// Create test modules - use absolute paths
23
+
detectModulePath = `${process.cwd()}/${TEMP_DIR}/test-detect.ts`;
24
await Bun.write(detectModulePath, `
25
+
export function detect({ op }) {
26
return op.did.includes('test') ? ['test'] : [];
27
+
}
28
`);
29
30
+
processModulePath = `${process.cwd()}/${TEMP_DIR}/test-process.ts`;
31
await Bun.write(processModulePath, `
32
+
let count = 0;
33
+
export function process({ op }) {
34
count++;
35
+
}
36
`);
37
});
38
+2
-2
tests/multithread.test.ts
+2
-2
tests/multithread.test.ts
···
20
await Bun.write(bundle.getBundlePath(i), compressed);
21
}
22
23
-
// Create test module with Bun.resolveSync
24
-
modulePath = Bun.resolveSync(`${TEMP_DIR}/test-module.ts`, process.cwd());
25
await Bun.write(modulePath, `
26
export function detect({ op }) {
27
return op.did.length > 10 ? ['long-did'] : [];
+3
-3
tests/processing.test.ts
+3
-3
tests/processing.test.ts
···
196
197
describe('processBundles with module path', () => {
198
test('loads module and calls function', async () => {
199
-
// Create a test module
200
-
const testModulePath = Bun.resolveSync(`${TEMP_DIR}/test-module.ts`, process.cwd());
201
await Bun.write(testModulePath, `
202
export function detect({ op }) {
203
return op.did.startsWith('did:plc:') ? ['test'] : [];
···
212
});
213
214
test('supports silent mode', async () => {
215
-
// Create absolute path directly (file doesn't exist yet to resolve)
216
const testModulePath = `${process.cwd()}/${TEMP_DIR}/noisy-module.ts`;
217
await Bun.write(testModulePath, `
218
export function detect({ op }) {
···
196
197
describe('processBundles with module path', () => {
198
test('loads module and calls function', async () => {
199
+
// Create a test module with absolute path
200
+
const testModulePath = `${process.cwd()}/${TEMP_DIR}/test-module.ts`;
201
await Bun.write(testModulePath, `
202
export function detect({ op }) {
203
return op.did.startsWith('did:plc:') ? ['test'] : [];
···
212
});
213
214
test('supports silent mode', async () => {
215
+
// Create absolute path directly
216
const testModulePath = `${process.cwd()}/${TEMP_DIR}/noisy-module.ts`;
217
await Bun.write(testModulePath, `
218
export function detect({ op }) {