+11
-30
.tangled/workflows/examples.yml
+11
-30
.tangled/workflows/examples.yml
···
20
20
- name: "Install dependencies"
21
21
command: "bun install"
22
22
23
-
- name: "Setup test environment"
23
+
- name: "Verify examples compile"
24
24
command: |
25
-
mkdir -p $TEST_DATA_DIR/bundles
26
-
echo "Test environment created"
25
+
echo "Checking all examples compile..."
26
+
for file in examples/*.ts; do
27
+
echo "Checking $file..."
28
+
bun --print "import('./$file')" > /dev/null || echo "Note: $file may require data"
29
+
done
27
30
28
-
- name: "Test info example"
31
+
- name: "Check detect functions"
29
32
command: |
30
-
echo "Testing info.ts example..."
31
-
bun examples/info.ts || echo "Expected to fail without data"
32
-
33
-
- name: "Test stream example"
34
-
command: |
35
-
echo "Testing stream.ts example..."
36
-
bun examples/stream.ts || echo "Expected to fail without data"
37
-
38
-
- name: "Test analyze example"
39
-
command: |
40
-
echo "Testing analyze.ts example..."
41
-
bun examples/analyze.ts || echo "Expected to fail without data"
42
-
43
-
- name: "Test filter example"
44
-
command: |
45
-
echo "Testing filter.ts example..."
46
-
bun examples/filter.ts || echo "Expected to fail without data"
47
-
48
-
- name: "Test detect functions compile"
49
-
command: |
50
-
echo "Checking detect examples compile..."
51
-
bun --print 'import("./examples/detect.ts")'
52
-
bun --print 'import("./examples/detect-advanced.ts")'
53
-
54
-
- name: "Cleanup"
55
-
command: "rm -rf $TEST_DATA_DIR"
33
+
echo "Verifying detect functions..."
34
+
bun --print 'import("./examples/detect.ts")' > /dev/null
35
+
bun --print 'import("./examples/detect-advanced.ts")' > /dev/null
36
+
echo "✓ All detect functions compile"
+9
-28
.tangled/workflows/integration.yml
+9
-28
.tangled/workflows/integration.yml
···
10
10
dependencies:
11
11
nixpkgs:
12
12
- bun
13
-
- curl
14
13
15
14
environment:
16
15
TEST_REMOTE_URL: "https://plcbundle.atscan.net"
···
19
18
- name: "Install dependencies"
20
19
command: "bun install"
21
20
22
-
- name: "Test clone functionality"
21
+
- name: "Clone test bundles"
23
22
command: |
24
-
echo "Testing clone from remote repository..."
25
23
bun src/cli.ts clone \
26
24
--remote $TEST_REMOTE_URL \
27
25
--bundles 1-3 \
28
26
--threads 4 \
29
-
--dir ./test-integration-data
27
+
--dir ./test-data
30
28
31
29
- name: "Verify downloaded bundles"
32
30
command: |
33
-
echo "Verifying bundle integrity..."
34
-
bun src/cli.ts verify --bundle 1 --dir ./test-integration-data
35
-
bun src/cli.ts verify --bundle 2 --dir ./test-integration-data
36
-
bun src/cli.ts verify --bundle 3 --dir ./test-integration-data
31
+
bun src/cli.ts verify --bundle 1 --dir ./test-data
32
+
bun src/cli.ts verify --bundle 2 --dir ./test-data
33
+
bun src/cli.ts verify --bundle 3 --dir ./test-data
37
34
38
-
- name: "Test info command"
35
+
- name: "Test commands"
39
36
command: |
40
-
echo "Getting repository info..."
41
-
bun src/cli.ts info --dir ./test-integration-data
42
-
43
-
- name: "Test export command"
44
-
command: |
45
-
echo "Exporting operations from bundle 1..."
46
-
bun src/cli.ts export --bundle 1 --dir ./test-integration-data | head -n 10
47
-
48
-
- name: "Test detect with example"
49
-
command: |
50
-
echo "Testing detect functionality..."
51
-
bun src/cli.ts detect \
52
-
--detect ./examples/detect.ts \
53
-
--bundles 1 \
54
-
--dir ./test-integration-data \
55
-
| head -n 20
56
-
57
-
- name: "Cleanup test data"
58
-
command: "rm -rf ./test-integration-data"
37
+
bun src/cli.ts info --dir ./test-data
38
+
bun src/cli.ts export --bundle 1 --dir ./test-data | head -n 10
39
+
bun src/cli.ts detect --detect ./examples/detect.ts --bundles 1-2 --dir ./test-data | head -n 20
-32
.tangled/workflows/lint.yml
-32
.tangled/workflows/lint.yml
···
1
-
# Check code quality on every push
2
-
when:
3
-
- event: ["push", "pull_request"]
4
-
branch: ["main", "master", "develop"]
5
-
6
-
engine: "nixery"
7
-
8
-
dependencies:
9
-
nixpkgs:
10
-
- bun
11
-
12
-
steps:
13
-
- name: "Install dependencies"
14
-
command: "bun install"
15
-
16
-
- name: "Type check all files"
17
-
command: "bun run --bun tsc --noEmit"
18
-
19
-
- name: "Check for TypeScript errors in src"
20
-
command: |
21
-
echo "Checking src directory..."
22
-
bun run --bun tsc --noEmit src/**/*.ts || exit 1
23
-
24
-
- name: "Check for TypeScript errors in tests"
25
-
command: |
26
-
echo "Checking tests directory..."
27
-
bun run --bun tsc --noEmit tests/**/*.ts || exit 1
28
-
29
-
- name: "Verify examples compile"
30
-
command: |
31
-
echo "Checking examples..."
32
-
bun run --bun tsc --noEmit examples/**/*.ts || exit 1
-3
.tangled/workflows/test.yml
-3
.tangled/workflows/test.yml
+50
-13
README.md
+50
-13
README.md
···
1
1
# plcbundle-bun
2
2
3
-
⚡ plcbundle library built exclusively for **[Bun](https://bun.sh)**.
3
+
⚡ **Zero-dependency** [plcbundle](https://tangled.org/@atscan.net/plcbundle/blob/main/docs/specification.md) library written in **TypeScript** for **[Bun](https://bun.sh)**.
4
4
5
-
Leverages Bun's native features:
5
+
No external dependencies - just pure Bun runtime leveraging native features:
6
6
- 🗜️ Native `Bun.zstdDecompressSync()` - zero-copy decompression
7
7
- 🔐 Native `Bun.CryptoHasher` - SHA-256 verification
8
8
- 🚀 Native `Bun.file()` - optimized file I/O
9
9
- 🧵 Native `Worker` threads - parallel processing
10
10
- 📦 Native `Bun.resolveSync()` - module resolution
11
+
- 📘 Fully typed TypeScript - complete type safety
11
12
12
13
> **Note:** This is a Bun-native library. It does **not** work with Node.js.
13
14
14
15
## Requirements
15
16
16
-
- [Bun](https://bun.sh) >= 1.3
17
+
- [Bun](https://bun.sh) >= 1.2.17
17
18
18
19
```bash
19
20
# Install Bun if you haven't already
···
22
23
23
24
## Installation
24
25
26
+
### Global Installation (CLI)
27
+
25
28
```bash
29
+
bun i -g @atscan/plcbundle-bun
30
+
```
31
+
32
+
After global installation, the `plcbundle-bun` command is available:
33
+
34
+
```bash
35
+
plcbundle-bun --help
36
+
```
37
+
38
+
### Library Installation
39
+
40
+
```bash
41
+
bun add @atscan/plcbundle-bun
42
+
```
43
+
44
+
### Development
45
+
46
+
```bash
47
+
git clone https://tangled.org/@atscan.net/plcbundle-bun
48
+
cd plcbundle-bun
26
49
bun install
27
50
```
28
51
···
30
53
31
54
```bash
32
55
# Clone bundles from remote repository
33
-
bun src/cli.ts clone --remote https://plcbundle.atscan.net
56
+
plcbundle-bun clone --remote https://plcbundle.atscan.net
34
57
35
58
# Clone specific range with multiple threads
36
-
bun src/cli.ts clone --remote https://plcbundle.atscan.net --bundles 1-100 --threads 8
59
+
plcbundle-bun clone --remote https://plcbundle.atscan.net --bundles 1-100 --threads 8
37
60
38
61
# Show repository info
39
-
bun src/cli.ts info --dir ./bundles
62
+
plcbundle-bun info --dir ./bundles
40
63
41
64
# Detect/filter operations with custom function
42
-
bun src/cli.ts detect --detect ./examples/detect.ts --dir ./bundles
65
+
plcbundle-bun detect --detect ./examples/detect.ts --dir ./bundles
43
66
44
67
# Detect with range and threads
45
-
bun src/cli.ts detect --detect ./detect.ts --bundles 1-50 --threads 4
68
+
plcbundle-bun detect --detect ./detect.ts --bundles 1-50 --threads 4
46
69
47
70
# Verify bundle integrity
48
-
bun src/cli.ts verify --bundle 42 --dir ./bundles
71
+
plcbundle-bun verify --bundle 42 --dir ./bundles
49
72
50
73
# Export operations from bundle
51
-
bun src/cli.ts export --bundle 1 --dir ./bundles > ops.jsonl
74
+
plcbundle-bun export --bundle 1 --dir ./bundles > ops.jsonl
75
+
```
76
+
77
+
### Development CLI Usage
78
+
79
+
When developing locally, run commands directly:
80
+
81
+
```bash
82
+
bun src/cli.ts clone --remote https://plcbundle.atscan.net
83
+
bun src/cli.ts info --dir ./bundles
84
+
bun src/cli.ts detect --detect ./examples/detect.ts
52
85
```
53
86
54
87
## Library Usage
55
88
56
89
```typescript
57
-
import { PLCBundle } from './src';
90
+
import { PLCBundle } from "@atscan/plcbundle-bun";
58
91
59
92
// Initialize
60
93
const bundle = new PLCBundle('./bundles');
···
117
150
Then use it:
118
151
119
152
```bash
120
-
bun src/cli.ts detect --detect ./detect.ts
153
+
plcbundle-bun detect --detect ./detect.ts
121
154
```
122
155
123
156
## Why Bun?
124
157
125
158
This library uses Bun's native APIs for:
126
159
160
+
- **Zero dependencies** - Only requires Bun runtime, nothing else
161
+
- **Full TypeScript** - Complete type safety and IDE autocomplete
127
162
- **Native zstd decompression** - Built-in `zstdDecompressSync()`
128
163
- **Optimized file I/O** - `Bun.file()` with zero-copy operations
129
164
- **Fast crypto** - Native `CryptoHasher` for SHA-256
130
-
- **Instant startup** - No build step required
165
+
- **Instant startup** - No build step required, Bun runs TypeScript directly
131
166
- **Efficient parallelism** - Lightweight Workers
132
167
133
168
## Features
134
169
170
+
- 📦 **Zero dependencies** - pure Bun runtime only
171
+
- 📘 **TypeScript** - fully typed with complete type safety
135
172
- ⚡ **Bun-native** - leverages all native APIs
136
173
- 🔄 **Multi-threaded** - parallel downloads and processing
137
174
- 💾 **Auto-save progress** - every 5 seconds, no data loss
+4
-26
bun.lock
+4
-26
bun.lock
···
3
3
"workspaces": {
4
4
"": {
5
5
"name": "plcbundle",
6
+
"dependencies": {
7
+
"@jmespath-community/jmespath": "^1.3.0",
8
+
},
6
9
"devDependencies": {
7
10
"@types/bun": "^1.3.1",
8
11
},
9
-
"peerDependencies": {
10
-
"bun": ">=1.0.0",
11
-
},
12
12
},
13
13
},
14
14
"packages": {
15
-
"@oven/bun-darwin-aarch64": ["@oven/bun-darwin-aarch64@1.3.1", "", { "os": "darwin", "cpu": "arm64" }, "sha512-7Rap1BHNWqgnexc4wLjjdZeVRQKtk534iGuJ7qZ42i/q1B+cxJZ6zSnrFsYmo+zreH7dUyUXL3AHuXGrl2772Q=="],
16
-
17
-
"@oven/bun-darwin-x64": ["@oven/bun-darwin-x64@1.3.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-wpqmgT/8w+tEr5YMGt1u1sEAMRHhyA2SKZddC6GCPasHxSqkCWOPQvYIHIApnTsoSsxhxP0x6Cpe93+4c7hq/w=="],
18
-
19
-
"@oven/bun-darwin-x64-baseline": ["@oven/bun-darwin-x64-baseline@1.3.1", "", { "os": "darwin", "cpu": "x64" }, "sha512-mJo715WvwEHmJ6khNymWyxi0QrFzU94wolsUmxolViNHrk+2ugzIkVIJhTnxf7pHnarxxHwyJ/kgatuV//QILQ=="],
20
-
21
-
"@oven/bun-linux-aarch64": ["@oven/bun-linux-aarch64@1.3.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-ACn038SZL8del+sFnqCjf+haGB02//j2Ez491IMmPTvbv4a/D0iiNz9xiIB3ICbQd3EwQzi+Ut/om3Ba/KoHbQ=="],
22
-
23
-
"@oven/bun-linux-aarch64-musl": ["@oven/bun-linux-aarch64-musl@1.3.1", "", { "os": "linux", "cpu": "arm64" }, "sha512-gKU3Wv3BTG5VMjqMMnRwqU6tipCveE9oyYNt62efy6cQK3Vo1DOBwY2SmjbFw+yzj+Um20YoFOLGxghfQET4Ng=="],
24
-
25
-
"@oven/bun-linux-x64": ["@oven/bun-linux-x64@1.3.1", "", { "os": "linux", "cpu": "x64" }, "sha512-cAUeM3I5CIYlu5Ur52eCOGg9yfqibQd4lzt9G1/rA0ajqcnCBaTuekhUDZETJJf5H9QV+Gm46CqQg2DpdJzJsw=="],
26
-
27
-
"@oven/bun-linux-x64-baseline": ["@oven/bun-linux-x64-baseline@1.3.1", "", { "os": "linux", "cpu": "x64" }, "sha512-7+2aCrL81mtltZQbKdiPB58UL+Gr3DAIuPyUAKm0Ib/KG/Z8t7nD/eSMRY/q6b+NsAjYnVPiPwqSjC3edpMmmQ=="],
28
-
29
-
"@oven/bun-linux-x64-musl": ["@oven/bun-linux-x64-musl@1.3.1", "", { "os": "linux", "cpu": "x64" }, "sha512-8AgEAHyuJ5Jm9MUo1L53K1SRYu0bNGqV0E0L5rB5DjkteO4GXrnWGBT8qsuwuy7WMuCMY3bj64/pFjlRkZuiXw=="],
30
-
31
-
"@oven/bun-linux-x64-musl-baseline": ["@oven/bun-linux-x64-musl-baseline@1.3.1", "", { "os": "linux", "cpu": "x64" }, "sha512-tP0WWcAqrMayvkggOHBGBoyyoK+QHAqgRUyj1F6x5/udiqc9vCXmIt1tlydxYV/NvyvUAmJ7MWT0af44Xm2kJw=="],
32
-
33
-
"@oven/bun-windows-x64": ["@oven/bun-windows-x64@1.3.1", "", { "os": "win32", "cpu": "x64" }, "sha512-xdUjOZRq6PwPbbz4/F2QEMLBZwintGp7AS50cWxgkHnyp7Omz5eJfV6/vWtN4qwZIyR3V3DT/2oXsY1+7p3rtg=="],
34
-
35
-
"@oven/bun-windows-x64-baseline": ["@oven/bun-windows-x64-baseline@1.3.1", "", { "os": "win32", "cpu": "x64" }, "sha512-dcA+Kj7hGFrY3G8NWyYf3Lj3/GMViknpttWUf5pI6p6RphltZaoDu0lY5Lr71PkMdRZTwL2NnZopa/x/NWCdKA=="],
15
+
"@jmespath-community/jmespath": ["@jmespath-community/jmespath@1.3.0", "", { "bin": { "jp": "dist/cli.mjs" } }, "sha512-nzOrEdWKNpognj6CT+1Atr7gw0bqUC1KTBRyasBXS9NjFpz+og7LeFZrIQqV81GRcCzKa5H+DNipvv7NQK3GzA=="],
36
16
37
17
"@types/bun": ["@types/bun@1.3.1", "", { "dependencies": { "bun-types": "1.3.1" } }, "sha512-4jNMk2/K9YJtfqwoAa28c8wK+T7nvJFOjxI4h/7sORWcypRNxBpr+TPNaCfVWq70tLCJsqoFwcf0oI0JU/fvMQ=="],
38
18
39
19
"@types/node": ["@types/node@24.9.2", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-uWN8YqxXxqFMX2RqGOrumsKeti4LlmIMIyV0lgut4jx7KQBcBiW6vkDtIBvHnHIquwNfJhk8v2OtmO8zXWHfPA=="],
40
20
41
21
"@types/react": ["@types/react@19.2.2", "", { "dependencies": { "csstype": "^3.0.2" } }, "sha512-6mDvHUFSjyT2B2yeNx2nUgMxh9LtOWvkhIU3uePn2I2oyNymUAX1NIsdgviM4CH+JSrp2D2hsMvJOkxY+0wNRA=="],
42
-
43
-
"bun": ["bun@1.3.1", "", { "optionalDependencies": { "@oven/bun-darwin-aarch64": "1.3.1", "@oven/bun-darwin-x64": "1.3.1", "@oven/bun-darwin-x64-baseline": "1.3.1", "@oven/bun-linux-aarch64": "1.3.1", "@oven/bun-linux-aarch64-musl": "1.3.1", "@oven/bun-linux-x64": "1.3.1", "@oven/bun-linux-x64-baseline": "1.3.1", "@oven/bun-linux-x64-musl": "1.3.1", "@oven/bun-linux-x64-musl-baseline": "1.3.1", "@oven/bun-windows-x64": "1.3.1", "@oven/bun-windows-x64-baseline": "1.3.1" }, "os": [ "linux", "win32", "darwin", ], "cpu": [ "x64", "arm64", ], "bin": { "bun": "bin/bun.exe", "bunx": "bin/bunx.exe" } }, "sha512-enqkEb0RhNOgDzHQwv7uvnIhX3uSzmKzz779dL7kdH8SauyTdQvCz4O1UT2rU0UldQp2K9OlrJNdyDHayPEIvw=="],
44
22
45
23
"bun-types": ["bun-types@1.3.1", "", { "dependencies": { "@types/node": "*" }, "peerDependencies": { "@types/react": "^19" } }, "sha512-NMrcy7smratanWJ2mMXdpatalovtxVggkj11bScuWuiOoXTiKIu2eVS1/7qbyI/4yHedtsn175n4Sm4JcdHLXw=="],
46
24
+2
-1
examples/analyze.ts
+2
-1
examples/analyze.ts
···
19
19
withPds: 0,
20
20
};
21
21
22
-
await bundle.processBundles(1, 10, (op) => {
22
+
await bundle.processBundles(1, 10, (op, position, bundleNum, line) => {
23
23
stats.totalOps++;
24
24
stats.uniqueDids.add(op.did);
25
25
···
37
37
stats.withPds++;
38
38
}
39
39
});
40
+
40
41
41
42
console.log('📊 Analysis Results\n');
42
43
console.log(`Total operations: ${stats.totalOps.toLocaleString()}`);
+11
examples/detect-orig.js
+11
examples/detect-orig.js
+15
-3
examples/detect.ts
+15
-3
examples/detect.ts
···
1
1
/**
2
-
* Example detect function for use with plcbundle "detect" command
2
+
* Example detect function for filtering operations
3
+
*
4
+
* Usage:
5
+
* bun src/cli.ts detect --detect ./examples/detect.ts
3
6
*/
4
7
5
8
export function detect({ op }: { op: any }) {
6
9
const labels = [];
7
10
11
+
// Example: Detect test accounts
8
12
if (op.did.startsWith('did:plc:aa')) {
9
-
labels.push('test');
13
+
labels.push('test-account');
10
14
}
11
15
12
-
// Add your custom detection logic here
16
+
// Example: Detect by handle pattern
17
+
if (op.operation?.alsoKnownAs?.[0]?.includes('.test')) {
18
+
labels.push('test-handle');
19
+
}
20
+
21
+
// Example: Detect accounts created in 2024
22
+
if (op.createdAt?.match(/^\d{4}\-(03|06)/)) {
23
+
labels.push('created-march-june');
24
+
}
13
25
14
26
return labels;
15
27
}
+12
examples/flush-pds.ts
+12
examples/flush-pds.ts
···
1
+
// flushes all pds endpoints
2
+
3
+
const unique: Array<string> = []
4
+
5
+
export function process({ op }: { op: any }) {
6
+
7
+
const endpoint = op.operation.services?.atproto_pds?.endpoint
8
+
if (!unique.includes(endpoint)) {
9
+
console.log(endpoint)
10
+
unique.push(endpoint)
11
+
}
12
+
}
+31
examples/info.ts
+31
examples/info.ts
···
1
+
/**
2
+
* Example: Get repository information
3
+
*
4
+
* Usage:
5
+
* bun examples/info.ts
6
+
*/
7
+
8
+
import { PLCBundle } from '../src';
9
+
10
+
const bundle = new PLCBundle('./data/bundles');
11
+
12
+
const stats = await bundle.getStats();
13
+
14
+
console.log('📦 Repository Information\n');
15
+
console.log(`Version: ${stats.version}`);
16
+
console.log(`Last bundle: ${stats.lastBundle}`);
17
+
console.log(`Total bundles: ${stats.totalBundles}`);
18
+
console.log(`Total size: ${(stats.totalSize / 1e9).toFixed(2)} GB`);
19
+
console.log(`Updated: ${stats.updatedAt}`);
20
+
21
+
console.log('\n📊 Sample Bundle Info\n');
22
+
23
+
const metadata = await bundle.getMetadata(1);
24
+
if (metadata) {
25
+
console.log(`Bundle #${metadata.bundle_number}`);
26
+
console.log(` Operations: ${metadata.operation_count.toLocaleString()}`);
27
+
console.log(` Unique DIDs: ${metadata.did_count.toLocaleString()}`);
28
+
console.log(` Time range: ${metadata.start_time} → ${metadata.end_time}`);
29
+
console.log(` Size: ${(metadata.compressed_size / 1e6).toFixed(2)} MB (compressed)`);
30
+
console.log(` Size: ${(metadata.uncompressed_size / 1e6).toFixed(2)} MB (uncompressed)`);
31
+
}
-32
examples/library-use.js
-32
examples/library-use.js
···
1
-
import { PLCBundle } from './src';
2
-
3
-
// Create bundle reader
4
-
const plcbundle = new PLCBundle('./bundles');
5
-
6
-
// Get stats
7
-
const stats = await plcbundle.getStats();
8
-
console.log(stats);
9
-
10
-
// Stream operations
11
-
for await (const op of plcbundle.streamOperations(1)) {
12
-
console.log(op.did);
13
-
}
14
-
15
-
await bundle.processBundles(1, 100, (op, pos, num) => {
16
-
// Your logic
17
-
}, {
18
-
threads: 4,
19
-
onProgress: (stats) => {
20
-
console.log(`Processed ${stats.totalOps} ops`);
21
-
}
22
-
});
23
-
24
-
// Clone with progress
25
-
await bundle.clone('https://plcbundle.atscan.net', {
26
-
threads: 8,
27
-
bundles: '1-100',
28
-
verify: true,
29
-
onProgress: (stats) => {
30
-
console.log(`Downloaded ${stats.downloadedBundles}/${stats.totalBundles}`);
31
-
}
32
-
});
+1
-1
examples/parallel.ts
+1
-1
examples/parallel.ts
+28
examples/processor.ts
+28
examples/processor.ts
···
1
+
/**
2
+
* Example process function for general operation processing
3
+
*
4
+
* Usage:
5
+
* plcbundle-bun process ./examples/processor.ts
6
+
*/
7
+
8
+
export function process({ op, position, bundle, line }: {
9
+
op: any;
10
+
position: number;
11
+
bundle: number;
12
+
line: string;
13
+
}) {
14
+
// Example: Count operations by year
15
+
const year = op.createdAt.substring(0, 4);
16
+
17
+
// Example: Log specific operations
18
+
if (position % 1000 === 0) {
19
+
console.log(`Bundle ${bundle}, position ${position}: ${op.did}`);
20
+
}
21
+
22
+
// Example: Custom logic
23
+
if (op.operation?.alsoKnownAs?.length > 0) {
24
+
// Do something with operations that have handles
25
+
}
26
+
27
+
// No need to return anything
28
+
}
+23
examples/service-types.ts
+23
examples/service-types.ts
···
1
+
const counts: Record<string, number>= {}
2
+
3
+
export function process({ op }: { op: any }) {
4
+
5
+
if (!op.operation?.services) {
6
+
return
7
+
}
8
+
for (const key of Object.keys(op.operation.services)) {
9
+
if (!counts[key]) {
10
+
counts[key] = 1
11
+
} else {
12
+
counts[key] += 1
13
+
}
14
+
}
15
+
}
16
+
17
+
export function prepare() {
18
+
return { counts }
19
+
}
20
+
21
+
export function finalize(results: any, { aggregate }: any) {
22
+
console.log(Object.fromEntries(Object.entries(aggregate(results.map((r: any) => r.data.counts))).sort((a, b) => a[1] < b[1] ? 1 : -1)))
23
+
}
+6
jsr.json
+6
jsr.json
+12
-8
package.json
+12
-8
package.json
···
1
1
{
2
-
"name": "plcbundle-bun",
3
-
"version": "0.1.0",
2
+
"name": "@atscan/plcbundle-bun",
3
+
"version": "0.9.5",
4
4
"type": "module",
5
5
"description": "Bun library for working with DID PLC bundle archives (plcbundle)",
6
6
"main": "./src/index.ts",
7
7
"bin": {
8
-
"plcbundle": "./src/cli.ts"
8
+
"plcbundle-bun": "./src/cli.ts"
9
9
},
10
10
"exports": {
11
11
".": "./src/index.ts"
···
16
16
"scripts": {
17
17
"test": "bun test",
18
18
"test:watch": "bun test --watch",
19
-
"test:coverage": "bun test --coverage",
20
-
"cli": "bun src/cli.ts"
21
-
},
22
-
"peerDependencies": {
23
-
"bun": ">=1.0.0"
19
+
"tests:coverage": "bun test --coverage",
20
+
"cli": "bun src/cli.ts",
21
+
"publish": "bunx jsr publish"
24
22
},
25
23
"devDependencies": {
26
24
"@types/bun": "^1.3.1"
25
+
},
26
+
"publishConfig": {
27
+
"access": "public"
28
+
},
29
+
"dependencies": {
30
+
"@jmespath-community/jmespath": "^1.3.0"
27
31
}
28
32
}
+17
-9
src/cli.ts
+17
-9
src/cli.ts
···
2
2
3
3
import { clone } from './cmds/clone';
4
4
import { detect } from './cmds/detect';
5
+
import { processCmd } from './cmds/process';
5
6
import { info } from './cmds/info';
6
7
import { verify } from './cmds/verify';
7
8
import { exportCmd } from './cmds/export';
9
+
import { query } from './cmds/query';
8
10
9
11
const commands = {
10
12
clone,
11
13
detect,
14
+
process: processCmd,
15
+
query,
16
+
q: query, // Alias for query
12
17
info,
13
18
verify,
14
19
export: exportCmd,
···
21
26
bun cli <command> [options]
22
27
23
28
COMMANDS:
24
-
clone Clone bundles from a remote repository
25
-
detect Detect and filter operations using a custom function
26
-
info Show index or bundle information
27
-
verify Verify bundle integrity
28
-
export Export operations from bundle
29
-
help Show this help
29
+
clone Clone bundles from a remote repository
30
+
detect Detect and filter operations using a custom function
31
+
process Process operations with a custom function
32
+
query (q) Query operations using JMESPath or simple dot notation
33
+
info Show index or bundle information
34
+
verify Verify bundle integrity
35
+
export Export operations from bundle
36
+
help Show this help
30
37
31
38
Use 'bun cli <command> -h' for command-specific help
32
39
33
40
EXAMPLES:
34
41
bun cli clone --remote https://plcbundle.atscan.net
42
+
bun cli detect ./examples/detect.ts --bundles 1-100
43
+
bun cli q did --simple --bundles 1-1000
44
+
bun cli query 'operation.services.*.endpoint' --bundles 1-100
45
+
bun cli process ./my-processor.ts --threads 4
35
46
bun cli info --dir ./bundles
36
-
bun cli detect --detect ./examples/detect.ts
37
-
bun cli detect --detect ./examples/detect.ts --bundles 1-100
38
-
bun cli detect --detect ./examples/detect.ts --bundles 42 --threads 4
39
47
bun cli verify --bundle 42
40
48
bun cli export --bundle 1 > ops.jsonl
41
49
`);
+235
src/cmds/common.ts
+235
src/cmds/common.ts
···
1
+
import { PLCBundle } from '../plcbundle';
2
+
import type { ProcessStats } from '../types';
3
+
import { parseArgs } from 'util';
4
+
5
+
export interface ProcessingOptions {
6
+
dir: string;
7
+
start: number;
8
+
end: number;
9
+
modulePath: string;
10
+
threads: number;
11
+
silent: boolean;
12
+
flush?: boolean;
13
+
mode: 'detect' | 'process';
14
+
noProgress?: boolean;
15
+
onMatch?: (match: any, matchCount: number, matchedBytes: number) => void;
16
+
}
17
+
18
+
/**
19
+
* Common processing logic for both detect and process commands
20
+
*/
21
+
export async function processOperations(options: ProcessingOptions) {
22
+
const { dir, start, end, modulePath, threads, silent, flush, mode, noProgress, onMatch } = options;
23
+
24
+
const bundle = new PLCBundle(dir);
25
+
26
+
// Save original console
27
+
const originalConsole = {
28
+
log: console.log,
29
+
error: console.error,
30
+
warn: console.warn,
31
+
info: console.info,
32
+
debug: console.debug,
33
+
};
34
+
35
+
// Override console if silent
36
+
if (silent) {
37
+
console.log = () => {};
38
+
console.error = () => {};
39
+
console.warn = () => {};
40
+
console.info = () => {};
41
+
console.debug = () => {};
42
+
}
43
+
44
+
try {
45
+
originalConsole.error(`Processing bundles ${start}-${end} from ${dir}${threads > 1 ? ` (${threads} threads)` : ''}${silent ? ' (silent)' : ''}\n`);
46
+
47
+
if (mode === 'detect') {
48
+
originalConsole.log('bundle,position,cid,size,confidence,labels');
49
+
}
50
+
51
+
const startTime = Date.now();
52
+
let matchCount = 0;
53
+
let matchedBytes = 0;
54
+
55
+
if (threads > 1) {
56
+
// Multi-threaded mode
57
+
const result: any = await bundle.processBundles(start, end, {
58
+
module: modulePath,
59
+
threads,
60
+
silent,
61
+
flush,
62
+
onProgress: noProgress ? undefined : (progressStats: ProcessStats) => { // Check noProgress
63
+
const elapsed = (Date.now() - startTime) / 1000;
64
+
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
65
+
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
66
+
originalConsole.error(`Processed ${progressStats.totalOps} ops | ${opsPerSec} ops/sec | ${mbPerSec} MB/s\r`);
67
+
},
68
+
onMatch: flush && onMatch ? (match) => {
69
+
matchCount++;
70
+
matchedBytes += match.size;
71
+
onMatch(match, matchCount, matchedBytes);
72
+
} : undefined,
73
+
});
74
+
75
+
// Output buffered matches (if not flushed)
76
+
if (!flush && result.matches && onMatch) {
77
+
for (const match of result.matches) {
78
+
matchCount++;
79
+
matchedBytes += match.size;
80
+
onMatch(match, matchCount, matchedBytes);
81
+
}
82
+
}
83
+
84
+
const elapsed = (Date.now() - startTime) / 1000;
85
+
const opsPerSec = (result.totalOps / elapsed).toFixed(0);
86
+
const mbPerSec = (result.totalBytes / elapsed / 1e6).toFixed(1);
87
+
88
+
originalConsole.error('\n');
89
+
originalConsole.error(`✓ ${mode === 'detect' ? 'Detection' : 'Processing'} complete`);
90
+
originalConsole.error(` Total operations: ${result.totalOps.toLocaleString()}`);
91
+
92
+
if (mode === 'detect') {
93
+
originalConsole.error(` Matches found: ${matchCount.toLocaleString()} (${(matchCount/result.totalOps*100).toFixed(2)}%)`);
94
+
originalConsole.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/result.totalBytes*100).toFixed(2)}%)`);
95
+
}
96
+
97
+
originalConsole.error(` Total size: ${(result.totalBytes / 1e6).toFixed(1)} MB`);
98
+
originalConsole.error('');
99
+
originalConsole.error(` Time elapsed: ${elapsed.toFixed(2)}s`);
100
+
originalConsole.error(` Throughput: ${opsPerSec} ops/sec | ${mbPerSec} MB/s`);
101
+
originalConsole.error(` Threads: ${threads}`);
102
+
} else {
103
+
// Single-threaded mode
104
+
const mod = await import(modulePath);
105
+
const userFn = mode === 'detect' ? (mod.detect || mod.default) : (mod.process || mod.default);
106
+
107
+
const finalStats = await bundle.processBundles(
108
+
start,
109
+
end,
110
+
(op, position, bundleNum, line) => {
111
+
if (mode === 'detect') {
112
+
const labels = userFn({ op });
113
+
114
+
if (labels && labels.length > 0) {
115
+
matchCount++;
116
+
matchedBytes += line.length;
117
+
if (onMatch) {
118
+
onMatch({
119
+
bundle: bundleNum,
120
+
position,
121
+
cid: op.cid.slice(-4),
122
+
size: line.length,
123
+
labels
124
+
}, matchCount, matchedBytes);
125
+
}
126
+
}
127
+
} else {
128
+
// Process mode - just call function
129
+
userFn({ op, position, bundle: bundleNum, line });
130
+
}
131
+
},
132
+
{
133
+
onProgress: noProgress ? undefined : (progressStats: ProcessStats) => { // Check noProgress
134
+
const elapsed = (Date.now() - startTime) / 1000;
135
+
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
136
+
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
137
+
originalConsole.error(`Processed ${progressStats.totalOps} ops | ${opsPerSec} ops/sec | ${mbPerSec} MB/s\r`);
138
+
},
139
+
}
140
+
);
141
+
142
+
const elapsed = (Date.now() - startTime) / 1000;
143
+
const opsPerSec = (finalStats.totalOps / elapsed).toFixed(0);
144
+
const mbPerSec = (finalStats.totalBytes / elapsed / 1e6).toFixed(1);
145
+
146
+
originalConsole.error('\n');
147
+
originalConsole.error(`✓ ${mode === 'detect' ? 'Detection' : 'Processing'} complete`);
148
+
originalConsole.error(` Total operations: ${finalStats.totalOps.toLocaleString()}`);
149
+
150
+
if (mode === 'detect') {
151
+
originalConsole.error(` Matches found: ${matchCount.toLocaleString()} (${(matchCount/finalStats.totalOps*100).toFixed(2)}%)`);
152
+
originalConsole.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/finalStats.totalBytes*100).toFixed(2)}%)`);
153
+
}
154
+
155
+
originalConsole.error(` Total size: ${(finalStats.totalBytes / 1e6).toFixed(1)} MB`);
156
+
originalConsole.error('');
157
+
originalConsole.error(` Time elapsed: ${elapsed.toFixed(2)}s`);
158
+
originalConsole.error(` Throughput: ${opsPerSec} ops/sec | ${mbPerSec} MB/s`);
159
+
if (threads > 1) {
160
+
originalConsole.error(` Threads: ${threads}`);
161
+
}
162
+
}
163
+
} finally {
164
+
// Restore console
165
+
console.log = originalConsole.log;
166
+
console.error = originalConsole.error;
167
+
console.warn = originalConsole.warn;
168
+
console.info = originalConsole.info;
169
+
console.debug = originalConsole.debug;
170
+
}
171
+
}
172
+
173
+
/**
174
+
* Common argument parsing for process/detect commands
175
+
*/
176
+
export function parseProcessArgs(args: string[]) {
177
+
const { values, positionals } = parseArgs({
178
+
args,
179
+
options: {
180
+
dir: { type: 'string', default: './' },
181
+
bundles: { type: 'string' },
182
+
threads: { type: 'string', default: '1' },
183
+
silent: { type: 'boolean', default: false },
184
+
s: { type: 'boolean', default: false },
185
+
flush: { type: 'boolean', default: false },
186
+
'no-progress': { type: 'boolean', default: false }, // Add this
187
+
},
188
+
strict: false,
189
+
allowPositionals: true,
190
+
});
191
+
192
+
return { values, positionals };
193
+
}
194
+
195
+
/**
196
+
* Parse bundle selection from values
197
+
*/
198
+
export async function parseBundleSelection(
199
+
values: any,
200
+
bundle: PLCBundle
201
+
): Promise<{ start: number; end: number }> {
202
+
const stats = await bundle.getStats();
203
+
204
+
let start: number, end: number;
205
+
206
+
if (values.bundles && typeof values.bundles === 'string') {
207
+
const bundleSpec = values.bundles;
208
+
209
+
if (bundleSpec.includes('-')) {
210
+
const [startStr, endStr] = bundleSpec.split('-');
211
+
start = parseInt(startStr.trim());
212
+
end = parseInt(endStr.trim());
213
+
214
+
if (isNaN(start) || isNaN(end)) {
215
+
throw new Error(`Invalid bundle range: ${bundleSpec}`);
216
+
}
217
+
} else {
218
+
start = parseInt(bundleSpec);
219
+
end = start;
220
+
221
+
if (isNaN(start)) {
222
+
throw new Error(`Invalid bundle number: ${bundleSpec}`);
223
+
}
224
+
}
225
+
} else {
226
+
start = 1;
227
+
end = stats.lastBundle;
228
+
}
229
+
230
+
if (start < 1 || end > stats.lastBundle || start > end) {
231
+
throw new Error(`Invalid bundle range ${start}-${end} (available: 1-${stats.lastBundle})`);
232
+
}
233
+
234
+
return { start, end };
235
+
}
+37
-111
src/cmds/detect.ts
+37
-111
src/cmds/detect.ts
···
1
-
import { parseArgs } from 'util';
1
+
import { parseProcessArgs, parseBundleSelection, processOperations } from './common';
2
2
import { PLCBundle } from '../plcbundle';
3
-
import type { ProcessStats } from '../types';
4
3
5
4
export async function detect(args: string[]) {
6
5
if (args.includes('-h') || args.includes('--help')) {
7
6
console.log(`
8
7
detect - Detect and filter operations using a custom function
9
8
9
+
USAGE:
10
+
plcbundle-bun detect <module> [options]
11
+
12
+
ARGUMENTS:
13
+
<module> Path to detect function module (required)
14
+
10
15
OPTIONS:
11
16
--dir <path> Bundle directory (default: ./)
12
17
--bundles <spec> Bundle selection: number (42) or range (1-50)
13
-
If not specified, processes all available bundles
14
-
--detect <path> Path to detect function module (required)
15
18
--threads <num> Number of worker threads (default: 1)
19
+
--flush Output matches immediately (unsorted)
20
+
-s, --silent Suppress all console output from detect script
21
+
--no-progress Disable progress output (default: false)
16
22
17
23
EXAMPLES:
18
-
bun cli detect --detect ./detect.ts # All bundles
19
-
bun cli detect --detect ./detect.ts --bundles 42 # Single bundle
20
-
bun cli detect --detect ./detect.ts --bundles 1-50 # Range
21
-
bun cli detect --detect ./detect.ts --threads 4 # Multi-threaded
24
+
plcbundle-bun detect ./detect.ts
25
+
plcbundle-bun detect ./detect.ts --bundles 1-50 --threads 4
26
+
plcbundle-bun detect ./detect.ts --flush --silent --no-progress
22
27
`);
23
28
return;
24
29
}
25
30
26
-
const { values } = parseArgs({
27
-
args,
28
-
options: {
29
-
dir: { type: 'string', default: './' },
30
-
bundles: { type: 'string' },
31
-
detect: { type: 'string' },
32
-
threads: { type: 'string', default: '1' },
33
-
},
34
-
strict: false,
35
-
});
36
-
37
-
const dir = (values.dir as string) || './';
38
-
const threads = parseInt((values.threads as string) || '1');
39
-
40
-
if (!values.detect || typeof values.detect !== 'string') {
41
-
console.error('Error: --detect is required');
42
-
process.exit(1);
43
-
}
44
-
45
-
// Load bundle instance to get index
46
-
const bundle = new PLCBundle(dir);
47
-
const stats = await bundle.getStats();
48
-
49
-
// Parse bundle selection
50
-
let start: number, end: number;
51
-
52
-
if (values.bundles && typeof values.bundles === 'string') {
53
-
const bundleSpec = values.bundles;
54
-
55
-
if (bundleSpec.includes('-')) {
56
-
const [startStr, endStr] = bundleSpec.split('-');
57
-
start = parseInt(startStr.trim());
58
-
end = parseInt(endStr.trim());
59
-
60
-
if (isNaN(start) || isNaN(end)) {
61
-
console.error(`Error: Invalid bundle range: ${bundleSpec}`);
62
-
process.exit(1);
63
-
}
64
-
} else {
65
-
start = parseInt(bundleSpec);
66
-
end = start;
67
-
68
-
if (isNaN(start)) {
69
-
console.error(`Error: Invalid bundle number: ${bundleSpec}`);
70
-
process.exit(1);
71
-
}
72
-
}
73
-
} else {
74
-
start = 1;
75
-
end = stats.lastBundle;
76
-
}
31
+
const { values, positionals } = parseProcessArgs(args);
32
+
const modulePath = positionals[0];
77
33
78
-
// Validate range
79
-
if (start < 1 || end > stats.lastBundle || start > end) {
80
-
console.error(`Error: Invalid bundle range ${start}-${end} (available: 1-${stats.lastBundle})`);
34
+
if (!modulePath) {
35
+
console.error('Error: module path is required');
36
+
console.error('Usage: plcbundle-bun detect <module> [options]');
81
37
process.exit(1);
82
38
}
83
39
84
-
// Resolve and load detect function
85
-
const detectPath = Bun.resolveSync(values.detect, process.cwd());
86
-
const mod = await import(detectPath);
87
-
const detectFn = mod.detect || mod.default;
40
+
const dir = (values.dir as string) || './';
41
+
const threads = parseInt((values.threads as string) || '1');
42
+
const silent = Boolean(values.silent || values.s);
43
+
const flush = Boolean(values.flush);
44
+
const noProgress = Boolean(values['no-progress']); // Add this
88
45
89
-
console.error(`Processing bundles ${start}-${end} from ${dir}${threads > 1 ? ` (${threads} threads)` : ''}\n`);
90
-
console.log('bundle,position,cid,size,confidence,labels');
46
+
const bundle = new PLCBundle(dir);
47
+
const { start, end } = await parseBundleSelection(values, bundle);
48
+
const resolvedPath = Bun.resolveSync(modulePath, process.cwd());
91
49
92
-
let matchCount = 0;
93
-
let matchedBytes = 0;
94
-
const startTime = Date.now();
95
-
96
-
// Process bundles with progress
97
-
const finalStats = await bundle.processBundles(
50
+
await processOperations({
51
+
dir,
98
52
start,
99
53
end,
100
-
(op, position, bundleNum) => {
101
-
const opSize = JSON.stringify(op).length;
102
-
const labels = detectFn({ op });
103
-
104
-
if (labels && labels.length > 0) {
105
-
matchCount++;
106
-
matchedBytes += opSize;
107
-
const cidShort = op.cid.slice(-4);
108
-
console.log(`${bundleNum},${position},${cidShort},${opSize},0.95,${labels.join(';')}`);
109
-
}
54
+
modulePath: resolvedPath,
55
+
threads,
56
+
silent,
57
+
flush,
58
+
noProgress, // Pass it
59
+
mode: 'detect',
60
+
onMatch: (match) => {
61
+
console.log(`${match.bundle},${match.position},${match.cid},${match.size},0.95,${match.labels.join(';')}`);
110
62
},
111
-
{
112
-
threads,
113
-
onProgress: (progressStats: ProcessStats) => {
114
-
const elapsed = (Date.now() - startTime) / 1000;
115
-
const opsPerSec = (progressStats.totalOps / elapsed).toFixed(0);
116
-
const mbPerSec = (progressStats.totalBytes / elapsed / 1e6).toFixed(1);
117
-
console.error(`Processed ${progressStats.totalOps} ops | ${opsPerSec} ops/sec | ${mbPerSec} MB/s\r`);
118
-
},
119
-
}
120
-
);
121
-
122
-
const elapsed = (Date.now() - startTime) / 1000;
123
-
const opsPerSec = (finalStats.totalOps / elapsed).toFixed(0);
124
-
const mbPerSec = (finalStats.totalBytes / elapsed / 1e6).toFixed(1);
125
-
126
-
console.error('\n');
127
-
console.error('✓ Detection complete');
128
-
console.error(` Total operations: ${finalStats.totalOps.toLocaleString()}`);
129
-
console.error(` Matches found: ${matchCount.toLocaleString()} (${(matchCount/finalStats.totalOps*100).toFixed(2)}%)`);
130
-
console.error(` Total size: ${(finalStats.totalBytes / 1e6).toFixed(1)} MB`);
131
-
console.error(` Matched size: ${(matchedBytes / 1e6).toFixed(1)} MB (${(matchedBytes/finalStats.totalBytes*100).toFixed(2)}%)`);
132
-
console.error('');
133
-
console.error(` Time elapsed: ${elapsed.toFixed(2)}s`);
134
-
console.error(` Throughput: ${opsPerSec} ops/sec | ${mbPerSec} MB/s`);
135
-
if (threads > 1) {
136
-
console.error(` Threads: ${threads}`);
137
-
}
63
+
});
138
64
}
+64
src/cmds/process.ts
+64
src/cmds/process.ts
···
1
+
import { exit } from 'process';
2
+
import { parseProcessArgs, parseBundleSelection, processOperations } from './common';
3
+
import { PLCBundle } from '../plcbundle';
4
+
5
+
export async function processCmd(args: string[]) {
6
+
if (args.includes('-h') || args.includes('--help')) {
7
+
console.log(`
8
+
process - Process operations with a custom function
9
+
10
+
USAGE:
11
+
plcbundle-bun process <module> [options]
12
+
13
+
ARGUMENTS:
14
+
<module> Path to process function module (required)
15
+
16
+
OPTIONS:
17
+
--dir <path> Bundle directory (default: ./)
18
+
--bundles <spec> Bundle selection: number (42) or range (1-50)
19
+
--threads <num> Number of worker threads (default: 1)
20
+
-s, --silent Suppress all console output from process script
21
+
--no-progress Disable progress output (default: false)
22
+
23
+
EXAMPLES:
24
+
plcbundle-bun process ./my-processor.ts
25
+
plcbundle-bun process ./my-processor.ts --bundles 1-50 --threads 4
26
+
plcbundle-bun process ./my-processor.ts --no-progress
27
+
28
+
PROCESS FUNCTION:
29
+
export function process({ op, position, bundle, line }) {
30
+
// Your logic here
31
+
}
32
+
`);
33
+
return;
34
+
}
35
+
36
+
const { values, positionals } = parseProcessArgs(args);
37
+
const modulePath = positionals[0];
38
+
39
+
if (!modulePath) {
40
+
console.error('Error: module path is required');
41
+
console.error('Usage: plcbundle-bun process <module> [options]');
42
+
process.exit(1);
43
+
}
44
+
45
+
const dir = (values.dir as string) || './';
46
+
const threads = parseInt((values.threads as string) || '1');
47
+
const silent = Boolean(values.silent || values.s);
48
+
const noProgress = Boolean(values['no-progress']); // Add this
49
+
50
+
const bundle = new PLCBundle(dir);
51
+
const { start, end } = await parseBundleSelection(values, bundle);
52
+
const resolvedPath = Bun.resolveSync(modulePath, process.cwd());
53
+
54
+
await processOperations({
55
+
dir,
56
+
start,
57
+
end,
58
+
modulePath: resolvedPath,
59
+
threads,
60
+
silent,
61
+
noProgress, // Pass it
62
+
mode: 'process',
63
+
});
64
+
}
+399
src/cmds/query.ts
+399
src/cmds/query.ts
···
1
+
import { parseArgs } from 'util';
2
+
import { PLCBundle } from '../plcbundle';
3
+
import { search as jmespathSearch } from '@jmespath-community/jmespath';
4
+
5
+
export async function query(args: string[]) {
6
+
if (args.includes('-h') || args.includes('--help')) {
7
+
console.log(`
8
+
query - Query operations using JMESPath or simple dot notation
9
+
10
+
USAGE:
11
+
plcbundle-bun query <expression> [options]
12
+
13
+
ARGUMENTS:
14
+
<expression> Query expression (required)
15
+
16
+
OPTIONS:
17
+
--dir <path> Bundle directory (default: ./)
18
+
--bundles <spec> Bundle selection: number (42) or range (1-50)
19
+
--threads <num> Number of worker threads (default: 0 = auto-detect CPU cores)
20
+
--format <type> Output format: jsonl|count (default: jsonl)
21
+
--limit <num> Limit number of results
22
+
--simple Use fast simple dot notation (10x faster for basic queries)
23
+
--no-progress Disable progress output
24
+
25
+
QUERY MODES:
26
+
27
+
SIMPLE (--simple) - Ultra-fast for basic property access:
28
+
did Direct property (fastest!)
29
+
operation.services.atproto_pds Nested property
30
+
alsoKnownAs[0] Array indexing
31
+
operation.alsoKnownAs[0].name Combined access
32
+
33
+
JMESPATH (default) - Full query power:
34
+
operation.services.*.endpoint Wildcard
35
+
foo[?age > \`30\`] Filtering
36
+
{did: did, handle: alsoKnownAs[0]} Projection
37
+
operation.services[*].endpoint All endpoints
38
+
39
+
EXAMPLES:
40
+
# Ultra-fast simple queries (recommended for basic property access)
41
+
bun cli q 'did' --simple --bundles 1-10000
42
+
bun cli q 'operation.services.atproto_pds.endpoint' --simple
43
+
44
+
# Complex JMESPath queries (default)
45
+
bun cli q 'operation.services.*.endpoint' --bundles 1-100
46
+
bun cli q '[?operation.alsoKnownAs]' --bundles 1-100
47
+
`);
48
+
return;
49
+
}
50
+
51
+
const { values, positionals } = parseArgs({
52
+
args,
53
+
options: {
54
+
dir: { type: 'string', default: './' },
55
+
bundles: { type: 'string' },
56
+
threads: { type: 'string', default: '0' },
57
+
format: { type: 'string', default: 'jsonl' },
58
+
limit: { type: 'string' },
59
+
simple: { type: 'boolean', default: false },
60
+
'no-progress': { type: 'boolean', default: false },
61
+
},
62
+
strict: false,
63
+
allowPositionals: true,
64
+
});
65
+
66
+
const expression = positionals[0];
67
+
if (!expression) {
68
+
console.error('Error: Query expression is required');
69
+
process.exit(1);
70
+
}
71
+
72
+
const dir = (values.dir as string) || './';
73
+
let threads = parseInt((values.threads as string) || '0');
74
+
75
+
if (threads === 0) {
76
+
threads = navigator.hardwareConcurrency || 4;
77
+
}
78
+
79
+
const format = (values.format as string) || 'jsonl';
80
+
const limit = values.limit ? parseInt(values.limit as string) : undefined;
81
+
const useSimple = Boolean(values.simple);
82
+
const noProgress = Boolean(values['no-progress']);
83
+
84
+
const bundle = new PLCBundle(dir);
85
+
const stats = await bundle.getStats();
86
+
87
+
let start: number, end: number;
88
+
89
+
if (values.bundles && typeof values.bundles === 'string') {
90
+
const bundleSpec = values.bundles;
91
+
if (bundleSpec.includes('-')) {
92
+
const [startStr, endStr] = bundleSpec.split('-');
93
+
start = parseInt(startStr.trim());
94
+
end = parseInt(endStr.trim());
95
+
if (isNaN(start) || isNaN(end)) {
96
+
throw new Error(`Invalid bundle range: ${bundleSpec}`);
97
+
}
98
+
} else {
99
+
start = parseInt(bundleSpec);
100
+
end = start;
101
+
if (isNaN(start)) {
102
+
throw new Error(`Invalid bundle number: ${bundleSpec}`);
103
+
}
104
+
}
105
+
} else {
106
+
start = 1;
107
+
end = stats.lastBundle;
108
+
}
109
+
110
+
if (start < 1 || end > stats.lastBundle || start > end) {
111
+
throw new Error(`Invalid bundle range ${start}-${end} (available: 1-${stats.lastBundle})`);
112
+
}
113
+
114
+
const queryType = useSimple ? 'simple' : 'JMESPath';
115
+
const threadInfo = threads > 1 ? ` (${threads} threads)` : '';
116
+
console.error(`Querying bundles ${start}-${end}${threadInfo} with ${queryType}: ${expression}\n`);
117
+
118
+
const startTime = Date.now();
119
+
let matchCount = 0;
120
+
let totalOps = 0;
121
+
let totalBytes = 0;
122
+
const totalBundles = end - start + 1;
123
+
let shouldStop = false;
124
+
125
+
// Compile simple expression and detect fast path
126
+
let queryFn: (op: any) => any;
127
+
128
+
if (useSimple) {
129
+
const compiled = compileSimplePath(expression);
130
+
131
+
// Ultra-fast path for single property access (e.g., "did")
132
+
if (compiled.segments.length === 1 && compiled.segments[0].type === 'property') {
133
+
const prop = compiled.segments[0].value as string;
134
+
queryFn = (op) => op[prop];
135
+
} else {
136
+
// Fast path for dot notation
137
+
queryFn = (op) => querySimplePath(op, compiled);
138
+
}
139
+
} else {
140
+
// JMESPath
141
+
queryFn = (op) => jmespathSearch(op, expression);
142
+
}
143
+
144
+
if (threads === 1) {
145
+
// Single-threaded
146
+
for (let bundleNum = start; bundleNum <= end; bundleNum++) {
147
+
for await (const { op, line } of bundle.streamOperations(bundleNum)) {
148
+
totalOps++;
149
+
totalBytes += line.length;
150
+
151
+
if (!noProgress && totalOps % 5000 === 0) {
152
+
const elapsed = (Date.now() - startTime) / 1000;
153
+
const bundlesCompleted = bundleNum - start + 1;
154
+
const progress = bundlesCompleted / totalBundles;
155
+
const mbPerSec = totalBytes / elapsed / 1e6;
156
+
const eta = bundlesCompleted > 0 ? ((totalBundles - bundlesCompleted) / bundlesCompleted) * elapsed : 0;
157
+
renderProgressBar(elapsed, bundlesCompleted, totalBundles, progress, matchCount, mbPerSec, eta);
158
+
}
159
+
160
+
try {
161
+
const result = queryFn(op);
162
+
163
+
if (result !== null && result !== undefined) {
164
+
matchCount++;
165
+
if (format !== 'count') {
166
+
console.log(JSON.stringify(result));
167
+
}
168
+
if (limit && matchCount >= limit) {
169
+
shouldStop = true;
170
+
break;
171
+
}
172
+
}
173
+
} catch (error) {
174
+
// Skip invalid operations
175
+
}
176
+
}
177
+
if (shouldStop) break;
178
+
}
179
+
} else {
180
+
// Multi-threaded
181
+
const bundlesPerThread = Math.ceil(totalBundles / threads);
182
+
const workerPath = new URL('../worker.ts', import.meta.url).pathname;
183
+
const workers: Worker[] = [];
184
+
const workerStats: Array<{
185
+
totalOps: number;
186
+
totalBytes: number;
187
+
matchCount: number;
188
+
bundlesCompleted: number;
189
+
threadStart: number;
190
+
}> = [];
191
+
192
+
const workerPromises = [];
193
+
194
+
for (let i = 0; i < threads; i++) {
195
+
const threadStart = start + i * bundlesPerThread;
196
+
const threadEnd = Math.min(threadStart + bundlesPerThread - 1, end);
197
+
if (threadStart > end) break;
198
+
199
+
const worker = new Worker(workerPath);
200
+
workers.push(worker);
201
+
workerStats[i] = { totalOps: 0, totalBytes: 0, matchCount: 0, bundlesCompleted: 0, threadStart };
202
+
203
+
const promise = new Promise<any>((resolve) => {
204
+
worker.onmessage = (event) => {
205
+
const msg = event.data;
206
+
207
+
if (msg.type === 'progress') {
208
+
workerStats[i].totalOps = msg.totalOps;
209
+
workerStats[i].totalBytes = msg.totalBytes;
210
+
workerStats[i].matchCount = msg.matchCount;
211
+
workerStats[i].bundlesCompleted = Math.max(0, msg.currentBundle - workerStats[i].threadStart + 1);
212
+
213
+
let aggOps = 0, aggBytes = 0, aggMatches = 0, totalBundlesCompleted = 0;
214
+
for (const ws of workerStats) {
215
+
aggOps += ws.totalOps;
216
+
aggBytes += ws.totalBytes;
217
+
aggMatches += ws.matchCount;
218
+
totalBundlesCompleted += ws.bundlesCompleted;
219
+
}
220
+
221
+
const progress = Math.min(totalBundlesCompleted / totalBundles, 1.0);
222
+
if (!noProgress) {
223
+
const elapsed = (Date.now() - startTime) / 1000;
224
+
const mbPerSec = aggBytes / elapsed / 1e6;
225
+
const eta = totalBundlesCompleted > 0 ? ((totalBundles - totalBundlesCompleted) / totalBundlesCompleted) * elapsed : 0;
226
+
renderProgressBar(elapsed, totalBundlesCompleted, totalBundles, progress, aggMatches, mbPerSec, eta);
227
+
}
228
+
} else if (msg.type === 'match-batch') {
229
+
for (const match of msg.matches) {
230
+
matchCount++;
231
+
if (format !== 'count') {
232
+
console.log(JSON.stringify(match.result));
233
+
}
234
+
if (limit && matchCount >= limit) {
235
+
shouldStop = true;
236
+
workers.forEach(w => w.terminate());
237
+
break;
238
+
}
239
+
}
240
+
} else if (msg.type === 'result') {
241
+
totalOps += msg.totalOps;
242
+
totalBytes += msg.totalBytes;
243
+
resolve(msg);
244
+
}
245
+
};
246
+
});
247
+
248
+
workerPromises.push(promise);
249
+
worker.postMessage({
250
+
dir: bundle['dir'],
251
+
start: threadStart,
252
+
end: threadEnd,
253
+
expression: expression,
254
+
useSimple: useSimple,
255
+
flush: true,
256
+
mode: 'query',
257
+
});
258
+
}
259
+
260
+
await Promise.all(workerPromises);
261
+
workers.forEach(w => w.terminate());
262
+
}
263
+
264
+
const elapsed = (Date.now() - startTime) / 1000;
265
+
266
+
if (!noProgress) {
267
+
const mbPerSec = totalBytes / elapsed / 1e6;
268
+
renderProgressBar(elapsed, totalBundles, totalBundles, 1.0, matchCount, mbPerSec, 0);
269
+
console.error('\n');
270
+
}
271
+
272
+
if (format === 'count') {
273
+
console.log(matchCount);
274
+
}
275
+
276
+
console.error('');
277
+
console.error(`✓ Query complete`);
278
+
console.error(` Total operations: ${totalOps.toLocaleString()}`);
279
+
console.error(` Matches found: ${matchCount.toLocaleString()} (${totalOps > 0 ? ((matchCount/totalOps)*100).toFixed(2) : '0.00'}%)`);
280
+
console.error(` Total bytes: ${(totalBytes / 1e6).toFixed(1)} MB`);
281
+
console.error(` Time elapsed: ${elapsed.toFixed(2)}s`);
282
+
console.error(` Throughput: ${(totalOps / elapsed).toFixed(0)} ops/sec | ${(totalBytes / elapsed / 1e6).toFixed(1)} MB/s`);
283
+
if (threads > 1) {
284
+
console.error(` Threads: ${threads}`);
285
+
}
286
+
}
287
+
288
+
// Simple dot notation parser
289
+
interface SimplePath {
290
+
segments: Array<{ type: 'property' | 'index'; value: string | number }>;
291
+
}
292
+
293
+
function compileSimplePath(expression: string): SimplePath {
294
+
const segments: Array<{ type: 'property' | 'index'; value: string | number }> = [];
295
+
296
+
let current = '';
297
+
let i = 0;
298
+
299
+
while (i < expression.length) {
300
+
const char = expression[i];
301
+
302
+
if (char === '.') {
303
+
if (current) {
304
+
segments.push({ type: 'property', value: current });
305
+
current = '';
306
+
}
307
+
i++;
308
+
} else if (char === '[') {
309
+
if (current) {
310
+
segments.push({ type: 'property', value: current });
311
+
current = '';
312
+
}
313
+
i++;
314
+
let index = '';
315
+
while (i < expression.length && expression[i] !== ']') {
316
+
index += expression[i];
317
+
i++;
318
+
}
319
+
segments.push({ type: 'index', value: parseInt(index) });
320
+
i++;
321
+
} else {
322
+
current += char;
323
+
i++;
324
+
}
325
+
}
326
+
327
+
if (current) {
328
+
segments.push({ type: 'property', value: current });
329
+
}
330
+
331
+
return { segments };
332
+
}
333
+
334
+
function querySimplePath(obj: any, compiled: SimplePath): any {
335
+
let current = obj;
336
+
337
+
for (const segment of compiled.segments) {
338
+
if (current == null) return null;
339
+
340
+
if (segment.type === 'property') {
341
+
current = current[segment.value];
342
+
} else {
343
+
if (Array.isArray(current)) {
344
+
current = current[segment.value as number];
345
+
} else {
346
+
return null;
347
+
}
348
+
}
349
+
}
350
+
351
+
return current;
352
+
}
353
+
354
+
function renderProgressBar(
355
+
elapsed: number,
356
+
current: number,
357
+
total: number,
358
+
progress: number,
359
+
matches: number,
360
+
mbPerSec: number,
361
+
etaSeconds: number
362
+
) {
363
+
const barWidth = 40;
364
+
const filledWidth = Math.floor(progress * barWidth);
365
+
const hours = Math.floor(elapsed / 3600);
366
+
const minutes = Math.floor((elapsed % 3600) / 60);
367
+
const seconds = Math.floor(elapsed % 60);
368
+
const timeStr = `[${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}:${seconds.toString().padStart(2, '0')}]`;
369
+
370
+
let bar = '';
371
+
if (filledWidth === 0) {
372
+
bar = '>' + ' '.repeat(barWidth - 1);
373
+
} else if (filledWidth >= barWidth) {
374
+
bar = '>'.repeat(barWidth);
375
+
} else {
376
+
bar = '>' + '-'.repeat(filledWidth - 1) + ' '.repeat(barWidth - filledWidth);
377
+
}
378
+
379
+
const percent = (progress * 100).toFixed(1);
380
+
let etaStr = '';
381
+
if (etaSeconds > 0 && etaSeconds < 86400) {
382
+
if (etaSeconds < 60) {
383
+
etaStr = `${Math.ceil(etaSeconds)}s`;
384
+
} else if (etaSeconds < 3600) {
385
+
etaStr = `${Math.ceil(etaSeconds / 60)}m`;
386
+
} else {
387
+
const etaHours = Math.floor(etaSeconds / 3600);
388
+
const etaMin = Math.ceil((etaSeconds % 3600) / 60);
389
+
etaStr = `${etaHours}h ${etaMin}m`;
390
+
}
391
+
}
392
+
393
+
const matchesStr = matches >= 1000000 ? `${(matches / 1000000).toFixed(1)}M` : matches >= 1000 ? `${(matches / 1000).toFixed(0)}k` : matches.toString();
394
+
const line = etaStr
395
+
? `${timeStr} ${bar} ${current}/${total} (${percent}%) ${matchesStr} matches | ${mbPerSec.toFixed(1)} MB/s [ETA: ${etaStr}]`
396
+
: `${timeStr} ${bar} ${current}/${total} (${percent}%) ${matchesStr} matches | ${mbPerSec.toFixed(1)} MB/s`;
397
+
398
+
process.stderr.write(`\r${line}\x1b[K`);
399
+
}
+61
-6
src/cmds/verify.ts
+61
-6
src/cmds/verify.ts
···
8
8
9
9
OPTIONS:
10
10
--dir <path> Bundle directory (default: ./)
11
-
--bundle <num> Bundle number to verify (required)
11
+
--bundle <num> Bundle number to verify (required unless --chain is used)
12
+
--chain Verify entire chain instead of single bundle
13
+
--start <num> Start bundle for chain verification (default: 1)
14
+
--end <num> End bundle for chain verification (default: last bundle)
15
+
16
+
EXAMPLES:
17
+
plcbundle-bun verify --bundle 42
18
+
plcbundle-bun verify --chain
19
+
plcbundle-bun verify --chain --start 1 --end 100
12
20
`);
13
21
return;
14
22
}
···
18
26
options: {
19
27
dir: { type: 'string', default: './' },
20
28
bundle: { type: 'string' },
29
+
chain: { type: 'boolean', default: false },
30
+
start: { type: 'string' },
31
+
end: { type: 'string' },
21
32
},
22
33
strict: false,
23
34
});
24
35
36
+
const dir = (values.dir as string) || './';
37
+
const bundleInstance = new PLCBundle(dir);
38
+
39
+
// Chain verification
40
+
if (values.chain) {
41
+
const stats = await bundleInstance.getStats();
42
+
const start = values.start ? parseInt(values.start as string) : 1;
43
+
const end = values.end ? parseInt(values.end as string) : stats.lastBundle;
44
+
45
+
console.log(`Verifying chain: bundles ${start}-${end}\n`);
46
+
47
+
const startTime = Date.now();
48
+
49
+
const result = await bundleInstance.verifyChain({
50
+
start,
51
+
end,
52
+
onProgress: (current, total) => {
53
+
process.stdout.write(`Verified ${current}/${total} bundles\r`);
54
+
},
55
+
});
56
+
57
+
const elapsed = (Date.now() - startTime) / 1000;
58
+
59
+
console.log('\n');
60
+
61
+
if (result.valid) {
62
+
console.log(`✓ Chain verification passed`);
63
+
console.log(` Verified bundles: ${result.validBundles}`);
64
+
console.log(` Time elapsed: ${elapsed.toFixed(2)}s`);
65
+
} else {
66
+
console.error(`✗ Chain verification failed`);
67
+
console.error(` Valid bundles: ${result.validBundles}`);
68
+
console.error(` Invalid bundles: ${result.invalidBundles}`);
69
+
console.error('');
70
+
71
+
result.errors.forEach(({ bundleNum, errors }) => {
72
+
console.error(`Bundle ${bundleNum}:`);
73
+
errors.forEach(e => console.error(` - ${e}`));
74
+
});
75
+
76
+
process.exit(1);
77
+
}
78
+
79
+
return;
80
+
}
81
+
82
+
// Single bundle verification
25
83
if (!values.bundle || typeof values.bundle !== 'string') {
26
-
console.error('Error: --bundle is required');
84
+
console.error('Error: --bundle is required (or use --chain)');
27
85
process.exit(1);
28
86
}
29
87
30
-
const dir = (values.dir as string) || './';
31
88
const num = parseInt(values.bundle);
32
-
const bundle = new PLCBundle(dir);
33
-
34
-
const result = await bundle.verifyBundle(num);
89
+
const result = await bundleInstance.verifyBundle(num);
35
90
36
91
if (result.valid) {
37
92
console.log(`✓ Bundle ${num} is valid`);
+35
-1
src/index.ts
+35
-1
src/index.ts
···
1
+
/**
2
+
* plcbundle - Zero-dependency library for working with PLC bundle archives
3
+
*
4
+
* This module provides a Bun-native implementation for reading, processing,
5
+
* and cloning PLC (Placeholder DID) bundle archives. It leverages Bun's native
6
+
* features for optimal performance.
7
+
*
8
+
* @example
9
+
* ```ts
10
+
* import { PLCBundle } from "@yourscope/plcbundle-bun";
11
+
*
12
+
* // Create bundle instance
13
+
* const bundle = new PLCBundle("./bundles");
14
+
*
15
+
* // Clone from remote
16
+
* await bundle.clone("https://plcbundle.atscan.net", {
17
+
* bundles: "1-100",
18
+
* threads: 8
19
+
* });
20
+
*
21
+
* // Process operations
22
+
* await bundle.processBundles(1, 10, (op, pos, num) => {
23
+
* console.log(op.did);
24
+
* });
25
+
* ```
26
+
*
27
+
* @module
28
+
*/
29
+
1
30
export { PLCBundle } from './plcbundle';
2
31
export type {
3
32
BundleIndex,
4
33
BundleMetadata,
5
34
Operation,
6
-
ProcessCallback
35
+
ProcessCallback,
36
+
ProcessStats,
37
+
ProcessOptions,
38
+
CloneOptions,
39
+
CloneStats,
40
+
ChainVerificationResult,
7
41
} from './types';
+638
-43
src/plcbundle.ts
+638
-43
src/plcbundle.ts
···
6
6
ProcessOptions,
7
7
ProcessStats,
8
8
CloneOptions,
9
-
CloneStats
9
+
CloneStats,
10
+
ChainVerificationResult
10
11
} from './types';
11
12
12
13
/**
···
19
20
}
20
21
21
22
/**
22
-
* Bundle reader and processor for plcbundle format
23
+
* Main class for reading and processing PLC bundle archives.
24
+
*
25
+
* This class provides methods for:
26
+
* - Cloning bundles from remote repositories
27
+
* - Reading and verifying local bundles
28
+
* - Streaming operations from bundles
29
+
* - Processing bundles with custom callbacks
30
+
*
31
+
* All operations use Bun's native features for optimal performance.
32
+
*
33
+
* @example Basic usage
34
+
* ```ts
35
+
* const bundle = new PLCBundle('./my-bundles');
36
+
*
37
+
* // Get repository information
38
+
* const stats = await bundle.getStats();
39
+
* console.log(`Repository has ${stats.lastBundle} bundles`);
40
+
*
41
+
* // Stream operations from a bundle
42
+
* for await (const op of bundle.streamOperations(1)) {
43
+
* console.log(op.did);
44
+
* }
45
+
* ```
46
+
*
47
+
* @example Clone from remote
48
+
* ```ts
49
+
* const bundle = new PLCBundle('./bundles');
50
+
*
51
+
* await bundle.clone('https://plcbundle.atscan.net', {
52
+
* bundles: '1-100',
53
+
* threads: 8,
54
+
* verify: true,
55
+
* onProgress: (stats) => {
56
+
* console.log(`${stats.downloadedBundles}/${stats.totalBundles}`);
57
+
* }
58
+
* });
59
+
* ```
60
+
*
61
+
* @example Process with callback
62
+
* ```ts
63
+
* await bundle.processBundles(1, 10, (op, pos, num) => {
64
+
* if (op.did.startsWith('did:plc:')) {
65
+
* console.log(`Found DID: ${op.did}`);
66
+
* }
67
+
* }, {
68
+
* threads: 4,
69
+
* onProgress: (stats) => console.log(`${stats.totalOps} ops`)
70
+
* });
71
+
* ```
23
72
*/
24
73
export class PLCBundle {
25
74
private dir: string;
26
75
private indexPath: string;
27
76
private cachedIndex?: BundleIndex;
28
77
78
+
/**
79
+
* Create a new PLCBundle instance.
80
+
*
81
+
* @param dir - Directory containing bundle files (default: './')
82
+
* @param indexPath - Path to the index file (default: `${dir}/plc_bundles.json`)
83
+
*
84
+
* @example
85
+
* ```ts
86
+
* // Use default directory
87
+
* const bundle1 = new PLCBundle();
88
+
*
89
+
* // Specify custom directory
90
+
* const bundle2 = new PLCBundle('./my-bundles');
91
+
*
92
+
* // Custom directory and index path
93
+
* const bundle3 = new PLCBundle('./bundles', './custom-index.json');
94
+
* ```
95
+
*/
29
96
constructor(dir: string = './', indexPath?: string) {
30
97
this.dir = dir.endsWith('/') ? dir : `${dir}/`;
31
98
this.indexPath = indexPath || `${this.dir}plc_bundles.json`;
32
99
}
33
100
34
101
/**
35
-
* Load and cache the bundle index
102
+
* Load the bundle index from disk.
103
+
*
104
+
* The index is cached in memory after first load. Use `refresh` parameter
105
+
* to force reloading from disk.
106
+
*
107
+
* @param refresh - If true, reload from disk even if cached (default: false)
108
+
* @returns Promise resolving to the bundle index
109
+
* @throws Error if index file cannot be read or parsed
110
+
*
111
+
* @example
112
+
* ```ts
113
+
* // Load index (uses cache if available)
114
+
* const index = await bundle.loadIndex();
115
+
*
116
+
* // Force reload from disk
117
+
* const freshIndex = await bundle.loadIndex(true);
118
+
* ```
36
119
*/
37
120
async loadIndex(refresh = false): Promise<BundleIndex> {
38
121
if (!refresh && this.cachedIndex) {
···
45
128
}
46
129
47
130
/**
48
-
* Save the bundle index
131
+
* Save a bundle index to disk.
132
+
*
133
+
* Writes the index as formatted JSON and updates the in-memory cache.
134
+
* The index is written atomically using Bun's file API.
135
+
*
136
+
* @param index - The bundle index to save
137
+
* @returns Promise that resolves when save is complete
138
+
*
139
+
* @example
140
+
* ```ts
141
+
* const index = await bundle.loadIndex();
142
+
* index.last_bundle = 150;
143
+
* await bundle.saveIndex(index);
144
+
* ```
49
145
*/
50
146
async saveIndex(index: BundleIndex): Promise<void> {
51
147
await Bun.write(this.indexPath, JSON.stringify(index, null, 2));
···
53
149
}
54
150
55
151
/**
56
-
* Get metadata for a specific bundle
152
+
* Get metadata for a specific bundle.
153
+
*
154
+
* @param bundleNum - The bundle number to retrieve metadata for
155
+
* @returns Promise resolving to bundle metadata, or undefined if not found
156
+
*
157
+
* @example
158
+
* ```ts
159
+
* const metadata = await bundle.getMetadata(42);
160
+
* if (metadata) {
161
+
* console.log(`Bundle ${metadata.bundle_number} has ${metadata.operation_count} operations`);
162
+
* }
163
+
* ```
57
164
*/
58
165
async getMetadata(bundleNum: number): Promise<BundleMetadata | undefined> {
59
166
const index = await this.loadIndex();
···
61
168
}
62
169
63
170
/**
64
-
* Get bundle file path
171
+
* Get the file path for a specific bundle.
172
+
*
173
+
* Bundles are named with zero-padded 6-digit numbers (e.g., `000042.jsonl.zst`).
174
+
*
175
+
* @param bundleNum - The bundle number
176
+
* @returns Full path to the bundle file
177
+
*
178
+
* @example
179
+
* ```ts
180
+
* const path = bundle.getBundlePath(42);
181
+
* // Returns: "./bundles/000042.jsonl.zst"
182
+
* ```
65
183
*/
66
184
getBundlePath(bundleNum: number): string {
67
185
return `${this.dir}${bundleNum.toString().padStart(6, '0')}.jsonl.zst`;
68
186
}
69
187
70
188
/**
71
-
* Read and decompress a bundle
189
+
* Read and decompress a bundle file.
190
+
*
191
+
* Uses Bun's native zstd decompression for optimal performance.
192
+
*
193
+
* @param bundleNum - The bundle number to read
194
+
* @returns Promise resolving to the decompressed JSONL content as a string
195
+
* @throws Error if bundle file cannot be read or decompressed
196
+
*
197
+
* @example
198
+
* ```ts
199
+
* const jsonl = await bundle.readBundle(1);
200
+
* console.log(`Bundle size: ${jsonl.length} bytes`);
201
+
* ```
72
202
*/
73
203
async readBundle(bundleNum: number): Promise<string> {
74
204
const path = this.getBundlePath(bundleNum);
···
78
208
}
79
209
80
210
/**
81
-
* Parse operations from bundle content
211
+
* Parse operations from JSONL content.
212
+
*
213
+
* @param content - JSONL string with one operation per line
214
+
* @returns Array of parsed operations
215
+
*
216
+
* @example
217
+
* ```ts
218
+
* const jsonl = await bundle.readBundle(1);
219
+
* const operations = bundle.parseOperations(jsonl);
220
+
* console.log(`Parsed ${operations.length} operations`);
221
+
* ```
82
222
*/
83
223
parseOperations(content: string): Operation[] {
84
224
return content
···
88
228
}
89
229
90
230
/**
91
-
* Stream operations from a bundle (memory efficient)
231
+
* Stream operations from a bundle (memory efficient).
232
+
*
233
+
* This async generator yields operations one at a time, which is more
234
+
* memory efficient than loading all operations at once.
235
+
*
236
+
* @param bundleNum - The bundle number to stream from
237
+
* @yields Operations from the bundle
238
+
*
239
+
* @example
240
+
* ```ts
241
+
* for await (const op of bundle.streamOperations(1)) {
242
+
* console.log(op.did);
243
+
* if (someCondition) break; // Can stop early
244
+
* }
245
+
* ```
92
246
*/
93
-
async *streamOperations(bundleNum: number): AsyncGenerator<Operation> {
247
+
async *streamOperations(bundleNum: number): AsyncGenerator<{ op: Operation; line: string }> {
94
248
const content = await this.readBundle(bundleNum);
95
249
const lines = content.split('\n');
96
250
97
251
for (const line of lines) {
98
252
if (line.trim()) {
99
-
yield JSON.parse(line);
253
+
yield { op: JSON.parse(line), line };
100
254
}
101
255
}
102
256
}
103
257
104
258
/**
105
-
* Process multiple bundles with a callback (supports multi-threading)
259
+
* Process multiple bundles with a callback or module.
260
+
*
261
+
* Two modes:
262
+
* 1. **Direct callback** (single-threaded): Pass callback function
263
+
* 2. **Module path** (multi-threaded): Pass module path in options
264
+
*
265
+
* @param start - First bundle number
266
+
* @param end - Last bundle number
267
+
* @param callbackOrOptions - Callback function OR options object with module path
268
+
* @param options - Processing options (only if callback is provided)
269
+
* @returns Promise resolving to processing statistics
270
+
*
271
+
* @example Single-threaded with callback
272
+
* ```ts
273
+
* await bundle.processBundles(1, 10, (op, pos, num, line) => {
274
+
* console.log(op.did);
275
+
* });
276
+
* ```
277
+
*
278
+
* @example Multi-threaded with module
279
+
* ```ts
280
+
* await bundle.processBundles(1, 100, {
281
+
* module: './detect.ts',
282
+
* threads: 4,
283
+
* onProgress: (stats) => console.log(stats.totalOps)
284
+
* });
285
+
* ```
106
286
*/
107
287
async processBundles(
108
288
start: number,
109
289
end: number,
110
-
callback: ProcessCallback,
111
-
options: ProcessOptions = {}
112
-
): Promise<ProcessStats> {
113
-
const { threads = 1, onProgress } = options;
290
+
callbackOrOptions: ProcessCallback | ProcessOptions,
291
+
options?: ProcessOptions
292
+
): Promise<ProcessStats & { matches?: any[] }> {
293
+
let callback: ProcessCallback | undefined;
294
+
let processOptions: ProcessOptions;
114
295
115
-
if (threads > 1) {
116
-
return await this.processBundlesMultiThreaded(start, end, callback, threads, onProgress);
296
+
if (typeof callbackOrOptions === 'function') {
297
+
callback = callbackOrOptions;
298
+
processOptions = options || {};
117
299
} else {
118
-
return await this.processBundlesSingleThreaded(start, end, callback, onProgress);
300
+
processOptions = callbackOrOptions;
301
+
}
302
+
303
+
const { threads = 1, module, silent = false, flush = false, onProgress, onMatch } = processOptions;
304
+
305
+
// Validation: multi-threading requires module
306
+
if (threads > 1 && !module) {
307
+
throw new Error('Multi-threading requires module path. Use: processBundles(start, end, { module: "./detect.ts", threads: 4 })');
308
+
}
309
+
310
+
// Determine mode based on what function is exported
311
+
let mode: 'detect' | 'process' = 'detect';
312
+
let mod: any;
313
+
if (module) {
314
+
try {
315
+
mod = await import(module);
316
+
// If module has 'process' function, use process mode
317
+
if (mod.process) {
318
+
mode = 'process';
319
+
} else if (mod.detect) {
320
+
mode = 'detect';
321
+
}
322
+
} catch (e) {
323
+
// Default to detect
324
+
}
325
+
}
326
+
327
+
// Use workers for multi-threading with module
328
+
if (threads > 1 && module) {
329
+
return await this.processBundlesWorkers(
330
+
start,
331
+
end,
332
+
module,
333
+
threads,
334
+
silent,
335
+
flush,
336
+
mode, // Pass mode
337
+
onProgress,
338
+
onMatch
339
+
);
340
+
}
341
+
342
+
// Load module if provided but single-threaded
343
+
if (mod && !callback) {
344
+
const userFn = mode === 'detect' ? (mod.detect || mod.default) : (mod.process || mod.default);
345
+
346
+
callback = (op, position, bundleNum, line) => {
347
+
if (mode === 'detect') {
348
+
userFn({ op });
349
+
} else {
350
+
userFn({ op, position, bundle: bundleNum, line });
351
+
}
352
+
};
353
+
}
354
+
355
+
if (!callback) {
356
+
throw new Error('Either callback function or module path must be provided');
357
+
}
358
+
359
+
// Single-threaded fast path
360
+
return await this.processBundlesFast(start, end, callback, onProgress);
361
+
}
362
+
363
+
/**
364
+
* Multi-threaded processing using workers (optimized)
365
+
*/
366
+
private async processBundlesWorkers(
367
+
start: number,
368
+
end: number,
369
+
modulePath: string,
370
+
threads: number,
371
+
silent: boolean,
372
+
flush: boolean,
373
+
mode: 'detect' | 'process', // Add mode parameter
374
+
onProgress?: (stats: ProcessStats) => void,
375
+
onMatch?: (match: any) => void
376
+
): Promise<ProcessStats & { matches?: any[] }> {
377
+
const totalBundles = end - start + 1;
378
+
const bundlesPerThread = Math.ceil(totalBundles / threads);
379
+
380
+
const workerPath = new URL('./worker.ts', import.meta.url).pathname;
381
+
const workers: Worker[] = [];
382
+
const workerStats: Array<{ totalOps: number; totalBytes: number }> = [];
383
+
384
+
let aggregatedOps = 0;
385
+
let aggregatedBytes = 0;
386
+
const allMatches: any[] = [];
387
+
388
+
const workerPromises = [];
389
+
390
+
for (let i = 0; i < threads; i++) {
391
+
const threadStart = start + i * bundlesPerThread;
392
+
const threadEnd = Math.min(threadStart + bundlesPerThread - 1, end);
393
+
394
+
if (threadStart > end) break;
395
+
396
+
const worker = new Worker(workerPath);
397
+
workers.push(worker);
398
+
399
+
workerStats[i] = { totalOps: 0, totalBytes: 0 };
400
+
401
+
const promise = new Promise<any>((resolve) => {
402
+
worker.onmessage = (event) => {
403
+
const msg = event.data;
404
+
405
+
if (msg.type === 'progress') {
406
+
const oldStats = workerStats[i];
407
+
aggregatedOps += msg.totalOps - oldStats.totalOps;
408
+
aggregatedBytes += msg.totalBytes - oldStats.totalBytes;
409
+
workerStats[i] = { totalOps: msg.totalOps, totalBytes: msg.totalBytes };
410
+
411
+
if (onProgress) {
412
+
onProgress({
413
+
totalOps: aggregatedOps,
414
+
matchCount: 0,
415
+
totalBytes: aggregatedBytes,
416
+
matchedBytes: 0,
417
+
});
418
+
}
419
+
} else if (msg.type === 'match') {
420
+
// Handle flushed match - call callback immediately
421
+
if (onMatch) {
422
+
onMatch(msg);
423
+
}
424
+
} else if (msg.type === 'result') {
425
+
resolve(msg);
426
+
}
427
+
};
428
+
});
429
+
430
+
workerPromises.push(promise);
431
+
432
+
worker.postMessage({
433
+
dir: this.dir,
434
+
start: threadStart,
435
+
end: threadEnd,
436
+
modulePath,
437
+
silent,
438
+
flush,
439
+
mode, // Pass mode to worker
440
+
});
441
+
}
442
+
443
+
// Wait for all workers
444
+
const results = await Promise.all(workerPromises);
445
+
446
+
// Cleanup
447
+
workers.forEach(w => w.terminate());
448
+
449
+
if (modulePath) {
450
+
const mod = await import(modulePath);
451
+
if (mod.finalize) {
452
+
await mod.finalize(results, { aggregate: this.aggregate });
453
+
}
119
454
}
455
+
456
+
// Aggregate results
457
+
let totalOps = 0;
458
+
let totalBytes = 0;
459
+
460
+
for (const result of results) {
461
+
totalOps += result.totalOps;
462
+
totalBytes += result.totalBytes;
463
+
if (!flush) {
464
+
allMatches.push(...result.matches);
465
+
}
466
+
}
467
+
468
+
// Sort matches if not flushed
469
+
if (!flush && mode === 'detect') {
470
+
allMatches.sort((a, b) => {
471
+
if (a.bundle !== b.bundle) return a.bundle - b.bundle;
472
+
return a.position - b.position;
473
+
});
474
+
}
475
+
476
+
return {
477
+
totalOps,
478
+
matchCount: 0,
479
+
totalBytes,
480
+
matchedBytes: 0,
481
+
matches: flush || mode === 'process' ? undefined : allMatches,
482
+
};
483
+
}
484
+
485
+
private aggregate(objects: Array<{ [key: string]: number }>): { [key: string]: number } {
486
+
const aggregatedDict: { [key: string]: number } = {};
487
+
488
+
for (const currentObj of objects) {
489
+
for (const key in currentObj) {
490
+
if (Object.prototype.hasOwnProperty.call(currentObj, key)) {
491
+
aggregatedDict[key] = (aggregatedDict[key] || 0) + currentObj[key];
492
+
}
493
+
}
494
+
}
495
+
496
+
return aggregatedDict;
120
497
}
121
498
499
+
122
500
/**
123
-
* Single-threaded processing
501
+
* Fast single-threaded processing (optimized)
124
502
*/
125
-
private async processBundlesSingleThreaded(
503
+
private async processBundlesFast(
126
504
start: number,
127
505
end: number,
128
506
callback: ProcessCallback,
···
136
514
};
137
515
138
516
for (let bundleNum = start; bundleNum <= end; bundleNum++) {
139
-
let position = 0;
517
+
const content = await this.readBundle(bundleNum);
518
+
const lines = content.split('\n');
140
519
141
-
for await (const op of this.streamOperations(bundleNum)) {
520
+
for (let position = 0; position < lines.length; position++) {
521
+
const line = lines[position];
522
+
if (!line.trim()) continue;
523
+
142
524
stats.totalOps++;
143
-
stats.totalBytes += JSON.stringify(op).length;
525
+
stats.totalBytes += line.length;
144
526
145
-
await callback(op, position++, bundleNum);
527
+
const op = JSON.parse(line);
528
+
await callback(op, position, bundleNum, line);
146
529
147
530
if (onProgress && stats.totalOps % 10000 === 0) {
148
531
onProgress({ ...stats });
···
154
537
}
155
538
156
539
/**
157
-
* Multi-threaded processing
158
-
*/
159
-
private async processBundlesMultiThreaded(
160
-
start: number,
161
-
end: number,
162
-
callback: ProcessCallback,
163
-
threads: number,
164
-
onProgress?: (stats: ProcessStats) => void
165
-
): Promise<ProcessStats> {
166
-
// Simplified implementation - full multi-threading requires callback serialization
167
-
return await this.processBundlesSingleThreaded(start, end, callback, onProgress);
168
-
}
169
-
170
-
/**
171
-
* Clone bundles from a remote repository
540
+
* Clone bundles from a remote repository.
541
+
*
542
+
* Downloads bundles via HTTP, verifies hashes, and saves progress periodically.
543
+
* Supports resuming interrupted downloads - bundles that already exist and
544
+
* pass verification will be skipped.
545
+
*
546
+
* Progress is automatically saved every 5 seconds and on completion.
547
+
*
548
+
* @param remoteUrl - Base URL of the remote bundle repository
549
+
* @param options - Clone options (bundles, threads, verification, callbacks)
550
+
* @returns Promise resolving to clone statistics
551
+
* @throws Error if remote is unreachable or bundle range is invalid
552
+
*
553
+
* @example Clone all bundles
554
+
* ```ts
555
+
* await bundle.clone('https://plcbundle.atscan.net', {
556
+
* threads: 8,
557
+
* verify: true
558
+
* });
559
+
* ```
560
+
*
561
+
* @example Clone specific range with progress
562
+
* ```ts
563
+
* await bundle.clone('https://plcbundle.atscan.net', {
564
+
* bundles: '1-100',
565
+
* threads: 4,
566
+
* onProgress: (stats) => {
567
+
* const pct = (stats.downloadedBytes / stats.totalBytes * 100).toFixed(1);
568
+
* console.log(`${pct}% complete`);
569
+
* }
570
+
* });
571
+
* ```
572
+
*
573
+
* @example Resume interrupted download
574
+
* ```ts
575
+
* // First run - interrupted
576
+
* await bundle.clone('https://plcbundle.atscan.net', { bundles: '1-1000' });
577
+
*
578
+
* // Second run - automatically resumes
579
+
* await bundle.clone('https://plcbundle.atscan.net', { bundles: '1-1000' });
580
+
* ```
172
581
*/
173
582
async clone(
174
583
remoteUrl: string,
···
398
807
}
399
808
400
809
/**
401
-
* Verify bundle integrity
810
+
* Verify the integrity of a bundle.
811
+
*
812
+
* Checks both the compressed file hash and the content hash against
813
+
* the values stored in the bundle index.
814
+
*
815
+
* Uses Bun's native SHA-256 hasher for optimal performance.
816
+
*
817
+
* @param bundleNum - The bundle number to verify
818
+
* @returns Promise resolving to verification result
819
+
*
820
+
* @example
821
+
* ```ts
822
+
* const result = await bundle.verifyBundle(42);
823
+
* if (result.valid) {
824
+
* console.log('Bundle is valid');
825
+
* } else {
826
+
* console.error('Verification failed:');
827
+
* result.errors.forEach(err => console.error(` - ${err}`));
828
+
* }
829
+
* ```
402
830
*/
403
831
async verifyBundle(bundleNum: number): Promise<{ valid: boolean; errors: string[] }> {
404
832
const metadata = await this.getMetadata(bundleNum);
···
429
857
}
430
858
431
859
/**
432
-
* Calculate chain hash
860
+
* Calculate a chain hash linking bundles together.
861
+
*
862
+
* The chain hash ensures bundles form an unbroken chain and haven't
863
+
* been tampered with. Genesis bundles use a special hash format.
864
+
*
865
+
* @param parentHash - Hash of the parent bundle (empty for genesis)
866
+
* @param contentHash - Hash of this bundle's content
867
+
* @param isGenesis - Whether this is the first bundle in the chain
868
+
* @returns The calculated chain hash as a hex string
869
+
*
870
+
* @example
871
+
* ```ts
872
+
* // Genesis bundle
873
+
* const genesisHash = bundle.calculateChainHash('', contentHash, true);
874
+
*
875
+
* // Subsequent bundle
876
+
* const chainHash = bundle.calculateChainHash(parentHash, contentHash, false);
877
+
* ```
433
878
*/
434
879
calculateChainHash(parentHash: string, contentHash: string, isGenesis: boolean): string {
435
880
const input = isGenesis
···
440
885
}
441
886
442
887
/**
443
-
* Get bundle statistics
888
+
* Get repository statistics.
889
+
*
890
+
* Provides a quick overview of the bundle repository without loading
891
+
* all bundle metadata.
892
+
*
893
+
* @returns Promise resolving to repository statistics
894
+
*
895
+
* @example
896
+
* ```ts
897
+
* const stats = await bundle.getStats();
898
+
* console.log(`Version: ${stats.version}`);
899
+
* console.log(`Bundles: ${stats.totalBundles}`);
900
+
* console.log(`Size: ${(stats.totalSize / 1e9).toFixed(2)} GB`);
901
+
* console.log(`Updated: ${stats.updatedAt}`);
902
+
* ```
444
903
*/
445
904
async getStats(): Promise<{
446
905
version: string;
···
458
917
updatedAt: index.updated_at,
459
918
};
460
919
}
920
+
921
+
/**
922
+
* Verify the integrity of the entire bundle chain.
923
+
*
924
+
* This method validates:
925
+
* - Each bundle's compressed and content hashes
926
+
* - The chain hash linking each bundle to its parent
927
+
* - The continuity of the chain (no missing bundles)
928
+
* - The genesis bundle has correct initial hash
929
+
*
930
+
* @param options - Verification options
931
+
* @param options.start - First bundle to verify (default: 1)
932
+
* @param options.end - Last bundle to verify (default: last available bundle)
933
+
* @param options.onProgress - Callback for progress updates
934
+
* @returns Promise resolving to chain verification result
935
+
*
936
+
* @example Verify entire chain
937
+
* ```ts
938
+
* const result = await bundle.verifyChain();
939
+
*
940
+
* if (result.valid) {
941
+
* console.log(`✓ All ${result.totalBundles} bundles verified`);
942
+
* } else {
943
+
* console.error(`✗ Chain invalid: ${result.invalidBundles} errors`);
944
+
* result.errors.forEach(({ bundleNum, errors }) => {
945
+
* console.error(`Bundle ${bundleNum}:`);
946
+
* errors.forEach(e => console.error(` - ${e}`));
947
+
* });
948
+
* }
949
+
* ```
950
+
*
951
+
* @example Verify range with progress
952
+
* ```ts
953
+
* const result = await bundle.verifyChain({
954
+
* start: 1,
955
+
* end: 100,
956
+
* onProgress: (current, total) => {
957
+
* console.log(`Verified ${current}/${total} bundles`);
958
+
* }
959
+
* });
960
+
* ```
961
+
*/
962
+
async verifyChain(options: {
963
+
start?: number;
964
+
end?: number;
965
+
onProgress?: (current: number, total: number) => void;
966
+
} = {}): Promise<ChainVerificationResult> {
967
+
const index = await this.loadIndex();
968
+
969
+
const start = options.start || 1;
970
+
const end = options.end || index.last_bundle;
971
+
972
+
// Validate range
973
+
if (start < 1 || end > index.last_bundle || start > end) {
974
+
throw new Error(`Invalid bundle range ${start}-${end} (available: 1-${index.last_bundle})`);
975
+
}
976
+
977
+
const result: ChainVerificationResult = {
978
+
valid: true,
979
+
totalBundles: end - start + 1,
980
+
validBundles: 0,
981
+
invalidBundles: 0,
982
+
errors: [],
983
+
};
984
+
985
+
let previousHash = '';
986
+
987
+
for (let bundleNum = start; bundleNum <= end; bundleNum++) {
988
+
const metadata = index.bundles.find(b => b.bundle_number === bundleNum);
989
+
990
+
if (!metadata) {
991
+
result.valid = false;
992
+
result.invalidBundles++;
993
+
result.errors.push({
994
+
bundleNum,
995
+
errors: ['Bundle missing from index'],
996
+
});
997
+
continue;
998
+
}
999
+
1000
+
const bundleErrors: string[] = [];
1001
+
1002
+
// Verify bundle file hashes
1003
+
const verification = await this.verifyBundle(bundleNum);
1004
+
if (!verification.valid) {
1005
+
bundleErrors.push(...verification.errors);
1006
+
}
1007
+
1008
+
// Verify chain linkage
1009
+
if (bundleNum === 1) {
1010
+
// Genesis bundle should have empty parent
1011
+
if (metadata.parent !== '') {
1012
+
bundleErrors.push(`Genesis bundle should have empty parent, got: ${metadata.parent}`);
1013
+
}
1014
+
1015
+
// Verify genesis hash format
1016
+
const expectedHash = this.calculateChainHash('', metadata.content_hash, true);
1017
+
if (metadata.hash !== expectedHash) {
1018
+
bundleErrors.push(`Invalid genesis chain hash: ${metadata.hash} != ${expectedHash}`);
1019
+
}
1020
+
} else {
1021
+
// Verify parent reference
1022
+
if (metadata.parent !== previousHash) {
1023
+
bundleErrors.push(`Parent hash mismatch: expected ${previousHash}, got ${metadata.parent}`);
1024
+
}
1025
+
1026
+
// Verify chain hash
1027
+
const expectedHash = this.calculateChainHash(metadata.parent, metadata.content_hash, false);
1028
+
if (metadata.hash !== expectedHash) {
1029
+
bundleErrors.push(`Invalid chain hash: ${metadata.hash} != ${expectedHash}`);
1030
+
}
1031
+
}
1032
+
1033
+
// Record results
1034
+
if (bundleErrors.length > 0) {
1035
+
result.valid = false;
1036
+
result.invalidBundles++;
1037
+
result.errors.push({
1038
+
bundleNum,
1039
+
errors: bundleErrors,
1040
+
});
1041
+
} else {
1042
+
result.validBundles++;
1043
+
}
1044
+
1045
+
previousHash = metadata.hash;
1046
+
1047
+
// Report progress
1048
+
if (options.onProgress) {
1049
+
options.onProgress(bundleNum - start + 1, result.totalBundles);
1050
+
}
1051
+
}
1052
+
1053
+
return result;
1054
+
}
1055
+
461
1056
}
+262
-1
src/types.ts
+262
-1
src/types.ts
···
1
+
/**
2
+
* Type definitions for plcbundle library
3
+
*
4
+
* This module contains all TypeScript type definitions used throughout
5
+
* the plcbundle library.
6
+
*
7
+
* @module
8
+
*/
9
+
10
+
/**
11
+
* Metadata for a single bundle in the repository.
12
+
*
13
+
* Contains information about the bundle's contents, hashes for verification,
14
+
* and temporal boundaries.
15
+
*/
1
16
export interface BundleMetadata {
17
+
/** Sequential number identifying this bundle (e.g., 1, 2, 3...) */
2
18
bundle_number: number;
19
+
20
+
/** ISO 8601 timestamp of the first operation in this bundle */
3
21
start_time: string;
22
+
23
+
/** ISO 8601 timestamp of the last operation in this bundle */
4
24
end_time: string;
25
+
26
+
/** Total number of PLC operations contained in this bundle */
5
27
operation_count: number;
28
+
29
+
/** Number of unique DIDs referenced in this bundle */
6
30
did_count: number;
31
+
32
+
/** Chain hash linking this bundle to its predecessor */
7
33
hash: string;
34
+
35
+
/** SHA-256 hash of the uncompressed JSONL content */
8
36
content_hash: string;
37
+
38
+
/** Chain hash of the previous bundle (empty string for genesis bundle) */
9
39
parent: string;
40
+
41
+
/** SHA-256 hash of the compressed .jsonl.zst file */
10
42
compressed_hash: string;
43
+
44
+
/** Size of the compressed bundle file in bytes */
11
45
compressed_size: number;
46
+
47
+
/** Size of the uncompressed JSONL content in bytes */
12
48
uncompressed_size: number;
49
+
50
+
/** Cursor for fetching subsequent operations (end_time of previous bundle) */
13
51
cursor: string;
52
+
53
+
/** ISO 8601 timestamp when this bundle was created */
14
54
created_at: string;
15
55
}
16
56
57
+
/**
58
+
* Index file containing metadata for all bundles in a repository.
59
+
*
60
+
* This is the main entry point for discovering available bundles.
61
+
* Located at `plc_bundles.json` in the repository root.
62
+
*/
17
63
export interface BundleIndex {
64
+
/** Version of the index format (currently "1.0") */
18
65
version: string;
66
+
67
+
/** Bundle number of the most recent bundle */
19
68
last_bundle: number;
69
+
70
+
/** ISO 8601 timestamp when the index was last updated */
20
71
updated_at: string;
72
+
73
+
/** Total size of all compressed bundles in bytes */
21
74
total_size_bytes: number;
75
+
76
+
/** Array of metadata for each bundle, sorted by bundle_number */
22
77
bundles: BundleMetadata[];
23
78
}
24
79
80
+
/**
81
+
* A single PLC operation as stored in bundles.
82
+
*
83
+
* Operations represent changes to DID documents in the PLC directory.
84
+
*/
25
85
export interface Operation {
86
+
/** Decentralized Identifier (DID) this operation applies to */
26
87
did: string;
88
+
89
+
/** Content Identifier (CID) of this operation */
27
90
cid: string;
91
+
92
+
/** The actual operation data containing DID document changes */
28
93
operation: any;
94
+
95
+
/** ISO 8601 timestamp when this operation was created */
29
96
createdAt: string;
97
+
98
+
/** Additional fields that may be present in operations */
30
99
[key: string]: any;
31
100
}
32
101
102
+
/**
103
+
* Callback function called for each operation during processing.
104
+
*
105
+
* @param op - The operation being processed
106
+
* @param position - Zero-based position of the operation within its bundle
107
+
* @param bundleNum - The bundle number being processed
108
+
* @param line - The raw JSONL line (for getting size without re-serializing)
109
+
*/
33
110
export type ProcessCallback = (
34
111
op: Operation,
35
112
position: number,
36
-
bundleNum: number
113
+
bundleNum: number,
114
+
line: string
37
115
) => void | Promise<void>;
38
116
117
+
/**
118
+
* Statistics collected during bundle processing.
119
+
*
120
+
* Tracks the number of operations and bytes processed.
121
+
*/
39
122
export interface ProcessStats {
123
+
/** Total number of operations processed */
40
124
totalOps: number;
125
+
126
+
/** Number of operations that matched criteria (if applicable) */
41
127
matchCount: number;
128
+
129
+
/** Total bytes of operation data processed */
42
130
totalBytes: number;
131
+
132
+
/** Bytes of data for matched operations (if applicable) */
43
133
matchedBytes: number;
44
134
}
45
135
136
+
/**
137
+
* Unified options for processing bundles.
138
+
*
139
+
* Supports both callback functions (single-threaded) and module paths (multi-threaded).
140
+
*/
46
141
export interface ProcessOptions {
142
+
/**
143
+
* Number of worker threads (default: 1).
144
+
* If > 1, requires `module` instead of passing callback directly.
145
+
*/
47
146
threads?: number;
147
+
148
+
/**
149
+
* Path to module exporting a `detect` or default function.
150
+
* Required when threads > 1. The module should export:
151
+
* ```ts
152
+
* export function detect({ op }) { return [...labels]; }
153
+
* ```
154
+
*/
155
+
module?: string;
156
+
157
+
/**
158
+
* Suppress all console output from the detect script (default: false).
159
+
*
160
+
* When enabled, console.log, console.error, etc. from the detect function
161
+
* are silenced. Progress reporting and final statistics are still shown.
162
+
*
163
+
* @example
164
+
* ```ts
165
+
* await bundle.processBundles(1, 10, {
166
+
* module: './noisy-detect.ts',
167
+
* silent: true // Suppress debug output
168
+
* });
169
+
* ```
170
+
*/
171
+
silent?: boolean;
172
+
173
+
/**
174
+
* Output matches immediately without buffering or sorting (default: false).
175
+
*
176
+
* When enabled, matches are output as soon as they're found rather than
177
+
* being collected, sorted, and output at the end. Useful for:
178
+
* - Real-time streaming output
179
+
* - Reducing memory usage with large result sets
180
+
* - Early access to results
181
+
*
182
+
* Note: With flush enabled, matches may be out of chronological order
183
+
* when using multiple threads.
184
+
*
185
+
* @example
186
+
* ```ts
187
+
* await bundle.processBundles(1, 100, {
188
+
* module: './detect.ts',
189
+
* threads: 4,
190
+
* flush: true,
191
+
* onMatch: (match) => {
192
+
* console.log(`Found: ${match.bundle}/${match.position}`);
193
+
* }
194
+
* });
195
+
* ```
196
+
*/
197
+
flush?: boolean;
198
+
199
+
/**
200
+
* Progress callback invoked periodically during processing.
201
+
*
202
+
* Called approximately every 10,000 operations to report current
203
+
* processing statistics.
204
+
*
205
+
* @param stats - Current processing statistics
206
+
*
207
+
* @example
208
+
* ```ts
209
+
* await bundle.processBundles(1, 100, callback, {
210
+
* onProgress: (stats) => {
211
+
* console.log(`${stats.totalOps} operations processed`);
212
+
* }
213
+
* });
214
+
* ```
215
+
*/
48
216
onProgress?: (stats: ProcessStats) => void;
217
+
218
+
/**
219
+
* Match callback invoked for each match when flush mode is enabled.
220
+
*
221
+
* Only called when `flush: true`. Receives match objects as they're
222
+
* found, without waiting for sorting. Ignored in non-flush mode.
223
+
*
224
+
* @param match - The match object with bundle, position, cid, size, and labels
225
+
*
226
+
* @example
227
+
* ```ts
228
+
* await bundle.processBundles(1, 100, {
229
+
* module: './detect.ts',
230
+
* flush: true,
231
+
* onMatch: (match) => {
232
+
* // Output immediately
233
+
* console.log(`${match.bundle},${match.position},${match.labels}`);
234
+
* }
235
+
* });
236
+
* ```
237
+
*/
238
+
onMatch?: (match: any) => void;
49
239
}
50
240
241
+
/**
242
+
* Options for cloning bundles from a remote repository.
243
+
*
244
+
* Controls download behavior, verification, and progress reporting.
245
+
*/
51
246
export interface CloneOptions {
247
+
/** Number of parallel download threads (default: 4) */
52
248
threads?: number;
249
+
250
+
/**
251
+
* Bundle selection specification.
252
+
*
253
+
* Can be:
254
+
* - A single bundle number: `"42"`
255
+
* - A range: `"1-100"`
256
+
* - Undefined to clone all available bundles
257
+
*/
53
258
bundles?: string;
259
+
260
+
/** Whether to verify SHA-256 hashes of downloaded bundles (default: true) */
54
261
verify?: boolean;
262
+
263
+
/**
264
+
* Function to check if cloning should stop (for graceful shutdown).
265
+
*
266
+
* @returns `true` if cloning should stop, `false` to continue
267
+
*/
55
268
shouldStop?: () => boolean;
269
+
270
+
/**
271
+
* Callback invoked to report download progress.
272
+
*
273
+
* @param stats - Current download statistics
274
+
*/
56
275
onProgress?: (stats: CloneStats) => void;
57
276
}
58
277
278
+
/**
279
+
* Statistics collected during bundle cloning.
280
+
*
281
+
* Tracks download progress, including successes, skips, and failures.
282
+
*/
59
283
export interface CloneStats {
284
+
/** Total number of bundles to download */
60
285
totalBundles: number;
286
+
287
+
/** Number of bundles successfully downloaded in this session */
61
288
downloadedBundles: number;
289
+
290
+
/** Number of bundles skipped (already existed and verified) */
62
291
skippedBundles: number;
292
+
293
+
/** Number of bundles that failed to download */
63
294
failedBundles: number;
295
+
296
+
/** Total bytes to download across all bundles */
64
297
totalBytes: number;
298
+
299
+
/** Bytes downloaded so far (including skipped bundles) */
65
300
downloadedBytes: number;
66
301
}
302
+
303
+
/**
304
+
* Result of chain verification.
305
+
*
306
+
* Contains overall validity status and detailed information about
307
+
* any issues found in the bundle chain.
308
+
*/
309
+
export interface ChainVerificationResult {
310
+
/** Whether the entire chain is valid */
311
+
valid: boolean;
312
+
313
+
/** Total number of bundles verified */
314
+
totalBundles: number;
315
+
316
+
/** Number of bundles that passed verification */
317
+
validBundles: number;
318
+
319
+
/** Number of bundles that failed verification */
320
+
invalidBundles: number;
321
+
322
+
/** Detailed errors for each invalid bundle */
323
+
errors: Array<{
324
+
bundleNum: number;
325
+
errors: string[];
326
+
}>;
327
+
}
+179
-60
src/worker.ts
+179
-60
src/worker.ts
···
1
1
/// <reference lib="webworker" />
2
2
3
-
import { PLCBundle } from './plcbundle';
4
-
import type { Operation } from './types';
3
+
import { search as jmespathSearch } from '@jmespath-community/jmespath';
5
4
6
5
export interface WorkerTask {
7
6
dir: string;
8
7
start: number;
9
8
end: number;
10
-
detectPath: string;
9
+
modulePath?: string;
10
+
expression?: string;
11
+
useSimple?: boolean;
12
+
silent?: boolean;
13
+
flush?: boolean;
14
+
mode?: 'detect' | 'process' | 'query';
11
15
}
12
16
13
17
export interface WorkerProgress {
14
18
type: 'progress';
19
+
currentBundle: number;
15
20
totalOps: number;
21
+
totalBytes: number;
16
22
matchCount: number;
17
-
totalBytes: number;
18
-
matchedBytes: number;
23
+
}
24
+
25
+
export interface WorkerMatchBatch {
26
+
type: 'match-batch';
27
+
matches: Array<{ result: any }>;
19
28
}
20
29
21
30
export interface WorkerResult {
22
31
type: 'result';
23
32
totalOps: number;
24
-
matchCount: number;
25
33
totalBytes: number;
26
-
matchedBytes: number;
27
-
matches: Array<{
28
-
bundle: number;
29
-
position: number;
30
-
cid: string;
31
-
size: number;
32
-
labels: string[];
33
-
}>;
34
+
matchCount: number;
35
+
data?: any;
34
36
}
35
37
36
-
// Worker message handler
37
38
self.onmessage = async (event: MessageEvent<WorkerTask>) => {
38
-
const { dir, start, end, detectPath } = event.data;
39
+
const {
40
+
dir,
41
+
start,
42
+
end,
43
+
modulePath,
44
+
expression,
45
+
useSimple,
46
+
silent,
47
+
flush,
48
+
mode = 'detect'
49
+
} = event.data;
39
50
40
-
const bundle = new PLCBundle(dir);
51
+
if (silent) {
52
+
globalThis.console = {
53
+
log: () => {},
54
+
error: () => {},
55
+
warn: () => {},
56
+
info: () => {},
57
+
debug: () => {},
58
+
trace: () => {},
59
+
} as any;
60
+
}
41
61
42
-
// Load detect function
43
-
const mod = await import(detectPath);
44
-
const detect = mod.detect || mod.default;
62
+
let userFn: any;
63
+
let queryFn: ((op: any) => any) | null = null;
64
+
65
+
if (mode === 'query') {
66
+
// Query mode
67
+
if (useSimple) {
68
+
const compiled = compileSimplePath(expression!);
69
+
70
+
// Ultra-fast path for single property
71
+
if (compiled.segments.length === 1 && compiled.segments[0].type === 'property') {
72
+
const prop = compiled.segments[0].value as string;
73
+
queryFn = (op) => op[prop];
74
+
} else {
75
+
queryFn = (op) => querySimplePath(op, compiled);
76
+
}
77
+
} else {
78
+
queryFn = (op) => jmespathSearch(op, expression!);
79
+
}
80
+
} else {
81
+
// Detect or process mode
82
+
const mod = await import(modulePath!);
83
+
userFn = mode === 'detect' ? (mod.detect || mod.default) : (mod.process || mod.default);
84
+
}
45
85
46
86
let totalOps = 0;
87
+
let totalBytes = 0;
47
88
let matchCount = 0;
48
-
let totalBytes = 0;
49
-
let matchedBytes = 0;
50
-
const matches: any[] = [];
89
+
const BATCH_SIZE = 1000;
90
+
let matchBatch: any[] = [];
51
91
52
-
await bundle.processBundles(start, end, (op: Operation, position: number, bundleNum: number) => {
53
-
totalOps++;
54
-
const opSize = JSON.stringify(op).length;
55
-
totalBytes += opSize;
92
+
const flushBatch = () => {
93
+
if (matchBatch.length > 0) {
94
+
self.postMessage({ type: 'match-batch', matches: matchBatch } as WorkerMatchBatch);
95
+
matchBatch = [];
96
+
}
97
+
};
98
+
99
+
for (let bundleNum = start; bundleNum <= end; bundleNum++) {
100
+
const bundlePath = `${dir}${bundleNum.toString().padStart(6, '0')}.jsonl.zst`;
56
101
57
-
const labels = detect({ op });
58
-
59
-
if (labels && labels.length > 0) {
60
-
matchCount++;
61
-
matchedBytes += opSize;
102
+
try {
103
+
const compressed = await Bun.file(bundlePath).arrayBuffer();
104
+
const decompressed = Bun.zstdDecompressSync(compressed);
105
+
const text = new TextDecoder().decode(decompressed);
106
+
const lines = text.split('\n');
107
+
108
+
for (let position = 0; position < lines.length; position++) {
109
+
const line = lines[position];
110
+
if (!line.trim()) continue;
111
+
112
+
totalOps++;
113
+
totalBytes += line.length;
114
+
const op = JSON.parse(line);
115
+
116
+
if (mode === 'query') {
117
+
try {
118
+
const result = queryFn!(op);
119
+
120
+
if (result !== null && result !== undefined) {
121
+
matchCount++;
122
+
matchBatch.push({ result });
123
+
if (matchBatch.length >= BATCH_SIZE) flushBatch();
124
+
}
125
+
} catch (error) {}
126
+
} else if (mode === 'detect') {
127
+
const labels = userFn({ op });
128
+
if (labels && labels.length > 0) {
129
+
matchCount++;
130
+
matchBatch.push({ result: { bundle: bundleNum, position, cid: op.cid.slice(-4), labels } });
131
+
if (matchBatch.length >= BATCH_SIZE) flushBatch();
132
+
}
133
+
} else {
134
+
// process mode
135
+
userFn({ op, position, bundle: bundleNum, line });
136
+
}
137
+
138
+
if (totalOps % 10000 === 0) {
139
+
self.postMessage({ type: 'progress', currentBundle: bundleNum, totalOps, totalBytes, matchCount } as WorkerProgress);
140
+
}
141
+
}
62
142
63
-
matches.push({
64
-
bundle: bundleNum,
65
-
position,
66
-
cid: op.cid.slice(-4),
67
-
size: opSize,
68
-
labels,
69
-
});
70
-
}
71
-
72
-
// Send progress every 10000 operations
73
-
if (totalOps % 10000 === 0) {
74
-
self.postMessage({
75
-
type: 'progress',
76
-
totalOps,
77
-
matchCount,
78
-
totalBytes,
79
-
matchedBytes,
80
-
} as WorkerProgress);
143
+
self.postMessage({ type: 'progress', currentBundle: bundleNum, totalOps, totalBytes, matchCount } as WorkerProgress);
144
+
} catch (error) {}
145
+
}
146
+
147
+
flushBatch();
148
+
149
+
let prepareResult: any;
150
+
if (mode !== 'query' && modulePath) {
151
+
const mod = await import(modulePath);
152
+
if (typeof mod.prepare === 'function') {
153
+
try {
154
+
prepareResult = await mod.prepare();
155
+
} catch (error) {}
81
156
}
82
-
});
157
+
}
83
158
84
-
// Send final result
85
-
self.postMessage({
86
-
type: 'result',
87
-
totalOps,
88
-
matchCount,
89
-
totalBytes,
90
-
matchedBytes,
91
-
matches,
92
-
} as WorkerResult);
159
+
self.postMessage({ type: 'result', totalOps, totalBytes, matchCount, data: prepareResult || null } as WorkerResult);
93
160
};
161
+
162
+
interface SimplePath {
163
+
segments: Array<{ type: 'property' | 'index'; value: string | number }>;
164
+
}
165
+
166
+
function compileSimplePath(expression: string): SimplePath {
167
+
const segments: Array<{ type: 'property' | 'index'; value: string | number }> = [];
168
+
let current = '';
169
+
let i = 0;
170
+
171
+
while (i < expression.length) {
172
+
const char = expression[i];
173
+
if (char === '.') {
174
+
if (current) segments.push({ type: 'property', value: current });
175
+
current = '';
176
+
i++;
177
+
} else if (char === '[') {
178
+
if (current) segments.push({ type: 'property', value: current });
179
+
current = '';
180
+
i++;
181
+
let index = '';
182
+
while (i < expression.length && expression[i] !== ']') {
183
+
index += expression[i];
184
+
i++;
185
+
}
186
+
segments.push({ type: 'index', value: parseInt(index) });
187
+
i++;
188
+
} else {
189
+
current += char;
190
+
i++;
191
+
}
192
+
}
193
+
if (current) segments.push({ type: 'property', value: current });
194
+
return { segments };
195
+
}
196
+
197
+
function querySimplePath(obj: any, compiled: SimplePath): any {
198
+
let current = obj;
199
+
for (const segment of compiled.segments) {
200
+
if (current == null) return null;
201
+
if (segment.type === 'property') {
202
+
current = current[segment.value];
203
+
} else {
204
+
if (Array.isArray(current)) {
205
+
current = current[segment.value as number];
206
+
} else {
207
+
return null;
208
+
}
209
+
}
210
+
}
211
+
return current;
212
+
}
+77
tests/commands.test.ts
+77
tests/commands.test.ts
···
1
+
import { describe, test, expect, beforeEach } from 'bun:test';
2
+
import { PLCBundle } from '../src/plcbundle';
3
+
import { TEMP_DIR, createMockIndex, createMockOperations } from './setup';
4
+
5
+
describe('CLI Commands', () => {
6
+
let detectModulePath: string;
7
+
let processModulePath: string;
8
+
9
+
beforeEach(async () => {
10
+
const bundle = new PLCBundle(TEMP_DIR);
11
+
const mockIndex = createMockIndex();
12
+
await bundle.saveIndex(mockIndex);
13
+
14
+
// Create test bundles
15
+
for (let i = 1; i <= 3; i++) {
16
+
const operations = createMockOperations(100);
17
+
const jsonl = operations.map(op => JSON.stringify(op)).join('\n') + '\n';
18
+
const compressed = Bun.zstdCompressSync(new TextEncoder().encode(jsonl));
19
+
await Bun.write(bundle.getBundlePath(i), compressed);
20
+
}
21
+
22
+
// Create test modules - use absolute paths
23
+
detectModulePath = `${process.cwd()}/${TEMP_DIR}/test-detect.ts`;
24
+
await Bun.write(detectModulePath, `
25
+
export function detect({ op }) {
26
+
return op.did.includes('test') ? ['test'] : [];
27
+
}
28
+
`);
29
+
30
+
processModulePath = `${process.cwd()}/${TEMP_DIR}/test-process.ts`;
31
+
await Bun.write(processModulePath, `
32
+
let count = 0;
33
+
export function process({ op }) {
34
+
count++;
35
+
}
36
+
`);
37
+
});
38
+
39
+
describe('detect command', () => {
40
+
test('module can be imported and has detect function', async () => {
41
+
const mod = await import(detectModulePath);
42
+
expect(mod.detect).toBeDefined();
43
+
expect(typeof mod.detect).toBe('function');
44
+
45
+
const result = mod.detect({ op: { did: 'test123' } });
46
+
expect(Array.isArray(result)).toBe(true);
47
+
});
48
+
});
49
+
50
+
describe('process command', () => {
51
+
test('module can be imported and has process function', async () => {
52
+
const mod = await import(processModulePath);
53
+
expect(mod.process).toBeDefined();
54
+
expect(typeof mod.process).toBe('function');
55
+
});
56
+
});
57
+
58
+
describe('module loading', () => {
59
+
test('detect module returns labels array', async () => {
60
+
const mod = await import(detectModulePath);
61
+
62
+
const result1 = mod.detect({ op: { did: 'did:plc:test123' } });
63
+
expect(result1).toContain('test');
64
+
65
+
const result2 = mod.detect({ op: { did: 'did:plc:abc' } });
66
+
expect(result2).toEqual([]);
67
+
});
68
+
69
+
test('process module executes without errors', async () => {
70
+
const mod = await import(processModulePath);
71
+
72
+
// Should not throw
73
+
mod.process({ op: { did: 'test' }, position: 0, bundle: 1, line: '{}' });
74
+
expect(true).toBe(true);
75
+
});
76
+
});
77
+
});
+78
tests/common.test.ts
+78
tests/common.test.ts
···
1
+
import { describe, test, expect, beforeEach } from 'bun:test';
2
+
import { parseProcessArgs, parseBundleSelection } from '../src/cmds/common';
3
+
import { PLCBundle } from '../src/plcbundle';
4
+
import { TEMP_DIR, createMockIndex } from './setup';
5
+
6
+
describe('Common CLI Logic', () => {
7
+
describe('parseProcessArgs', () => {
8
+
test('parses positional arguments', () => {
9
+
const { values, positionals } = parseProcessArgs(['./detect.ts', '--threads', '4']);
10
+
11
+
expect(positionals[0]).toBe('./detect.ts');
12
+
expect(values.threads).toBe('4');
13
+
});
14
+
15
+
test('parses boolean flags', () => {
16
+
const { values } = parseProcessArgs(['./detect.ts', '--silent', '--flush']);
17
+
18
+
expect(values.silent).toBe(true);
19
+
expect(values.flush).toBe(true);
20
+
});
21
+
22
+
test('handles short flags', () => {
23
+
const { values } = parseProcessArgs(['./detect.ts', '-s']);
24
+
25
+
expect(values.s).toBe(true);
26
+
});
27
+
});
28
+
29
+
describe('parseBundleSelection', () => {
30
+
let bundle: PLCBundle;
31
+
32
+
beforeEach(async () => {
33
+
bundle = new PLCBundle(TEMP_DIR);
34
+
const mockIndex = createMockIndex();
35
+
await bundle.saveIndex(mockIndex);
36
+
});
37
+
38
+
test('defaults to all bundles', async () => {
39
+
const { start, end } = await parseBundleSelection({}, bundle);
40
+
41
+
expect(start).toBe(1);
42
+
expect(end).toBe(3); // From mock index
43
+
});
44
+
45
+
test('parses single bundle', async () => {
46
+
const { start, end } = await parseBundleSelection({ bundles: '2' }, bundle);
47
+
48
+
expect(start).toBe(2);
49
+
expect(end).toBe(2);
50
+
});
51
+
52
+
test('parses bundle range', async () => {
53
+
const { start, end } = await parseBundleSelection({ bundles: '1-3' }, bundle);
54
+
55
+
expect(start).toBe(1);
56
+
expect(end).toBe(3);
57
+
});
58
+
59
+
test('throws error for invalid range', async () => {
60
+
try {
61
+
await parseBundleSelection({ bundles: '1-999' }, bundle);
62
+
expect(true).toBe(false); // Should throw
63
+
} catch (error) {
64
+
expect(error).toBeDefined();
65
+
expect((error as Error).message).toContain('Invalid bundle range');
66
+
}
67
+
});
68
+
69
+
test('throws error for invalid format', async () => {
70
+
try {
71
+
await parseBundleSelection({ bundles: 'invalid' }, bundle);
72
+
expect(true).toBe(false); // Should throw
73
+
} catch (error) {
74
+
expect(error).toBeDefined();
75
+
}
76
+
});
77
+
});
78
+
});
+101
tests/multithread.test.ts
+101
tests/multithread.test.ts
···
1
+
import { describe, test, expect, beforeEach } from 'bun:test';
2
+
import { PLCBundle } from '../src/plcbundle';
3
+
import { TEMP_DIR, createMockIndex, createMockOperations } from './setup';
4
+
5
+
describe('Multi-threaded Processing', () => {
6
+
let bundle: PLCBundle;
7
+
let modulePath: string;
8
+
9
+
beforeEach(async () => {
10
+
bundle = new PLCBundle(TEMP_DIR);
11
+
12
+
const mockIndex = createMockIndex();
13
+
await bundle.saveIndex(mockIndex);
14
+
15
+
// Create test bundles
16
+
for (let i = 1; i <= 5; i++) {
17
+
const operations = createMockOperations(100);
18
+
const jsonl = operations.map(op => JSON.stringify(op)).join('\n') + '\n';
19
+
const compressed = Bun.zstdCompressSync(new TextEncoder().encode(jsonl));
20
+
await Bun.write(bundle.getBundlePath(i), compressed);
21
+
}
22
+
23
+
// Create test module - use absolute path
24
+
modulePath = `${process.cwd()}/${TEMP_DIR}/test-module.ts`;
25
+
await Bun.write(modulePath, `
26
+
export function detect({ op }) {
27
+
return op.did.length > 10 ? ['long-did'] : [];
28
+
}
29
+
`);
30
+
});
31
+
32
+
test('module loads correctly', async () => {
33
+
const mod = await import(modulePath);
34
+
expect(mod.detect).toBeDefined();
35
+
});
36
+
37
+
test('single-threaded processing works', async () => {
38
+
const stats = await bundle.processBundles(1, 2, {
39
+
module: modulePath,
40
+
threads: 1,
41
+
});
42
+
43
+
expect(stats.totalOps).toBe(200); // 2 bundles * 100 ops
44
+
expect(stats.totalBytes).toBeGreaterThan(0);
45
+
});
46
+
47
+
test('multi-threaded processing with 2 threads', async () => {
48
+
const stats = await bundle.processBundles(1, 4, {
49
+
module: modulePath,
50
+
threads: 2,
51
+
});
52
+
53
+
expect(stats.totalOps).toBe(400); // 4 bundles * 100 ops
54
+
expect(stats.totalBytes).toBeGreaterThan(0);
55
+
});
56
+
57
+
test('multi-threaded produces same op count as single-threaded', async () => {
58
+
const stats1 = await bundle.processBundles(1, 3, {
59
+
module: modulePath,
60
+
threads: 1,
61
+
});
62
+
63
+
const stats2 = await bundle.processBundles(1, 3, {
64
+
module: modulePath,
65
+
threads: 2,
66
+
});
67
+
68
+
expect(stats1.totalOps).toBe(stats2.totalOps);
69
+
expect(stats1.totalBytes).toBe(stats2.totalBytes);
70
+
});
71
+
72
+
test('progress callback works with multi-threading', async () => {
73
+
let progressCalls = 0;
74
+
75
+
await bundle.processBundles(1, 5, {
76
+
module: modulePath,
77
+
threads: 2,
78
+
onProgress: () => {
79
+
progressCalls++;
80
+
},
81
+
});
82
+
83
+
// May or may not be called depending on threshold
84
+
expect(typeof progressCalls).toBe('number');
85
+
});
86
+
87
+
test('validates threads parameter', async () => {
88
+
let errorThrown = false;
89
+
90
+
try {
91
+
await bundle.processBundles(1, 1, () => {}, {
92
+
threads: 4, // Without module - should throw
93
+
});
94
+
} catch (error) {
95
+
errorThrown = true;
96
+
expect((error as Error).message).toContain('module');
97
+
}
98
+
99
+
expect(errorThrown).toBe(true);
100
+
});
101
+
});
+223
-40
tests/processing.test.ts
+223
-40
tests/processing.test.ts
···
51
51
});
52
52
});
53
53
54
+
describe('streamOperations', () => {
55
+
test('streams operations from bundle', async () => {
56
+
const operations = [];
57
+
58
+
for await (const { op, line } of bundle.streamOperations(1)) {
59
+
operations.push(op);
60
+
expect(line).toBeDefined();
61
+
expect(line.length).toBeGreaterThan(0);
62
+
}
63
+
64
+
expect(operations.length).toBe(100);
65
+
expect(operations[0].did).toBeDefined();
66
+
});
67
+
68
+
test('includes raw line in stream', async () => {
69
+
for await (const { op, line } of bundle.streamOperations(1)) {
70
+
expect(typeof line).toBe('string');
71
+
expect(line.trim().length).toBeGreaterThan(0);
72
+
73
+
// Line should be valid JSON
74
+
const parsed = JSON.parse(line);
75
+
expect(parsed.did).toBe(op.did);
76
+
break; // Just check first
77
+
}
78
+
});
79
+
});
80
+
54
81
describe('processBundles', () => {
55
-
test('calls callback for each operation', async () => {
82
+
test('processes with callback function', async () => {
56
83
const callback = mock(() => {});
57
84
58
-
await bundle.processBundles(1, 1, callback, { threads: 1 });
85
+
const stats = await bundle.processBundles(1, 1, callback);
59
86
60
-
// Should have been called for each operation (100 in our mock)
61
87
expect(callback).toHaveBeenCalled();
62
88
expect(callback.mock.calls.length).toBe(100);
89
+
expect(stats.totalOps).toBe(100);
63
90
});
64
91
65
-
test('tracks statistics', async () => {
92
+
test('callback receives all parameters', async () => {
93
+
const callback = mock((op: any, position: number, bundleNum: number, line: string) => {
94
+
expect(op).toBeDefined();
95
+
expect(typeof position).toBe('number');
96
+
expect(typeof bundleNum).toBe('number');
97
+
expect(typeof line).toBe('string');
98
+
});
99
+
100
+
await bundle.processBundles(1, 1, callback);
101
+
102
+
expect(callback).toHaveBeenCalled();
103
+
});
104
+
105
+
test('tracks statistics accurately', async () => {
66
106
const callback = mock(() => {});
67
107
68
-
const stats = await bundle.processBundles(1, 1, callback, {
69
-
threads: 1,
70
-
});
108
+
const stats = await bundle.processBundles(1, 1, callback);
71
109
72
-
expect(stats).toBeDefined();
73
-
expect(stats.totalOps).toBe(100); // Our mock has 100 operations
110
+
expect(stats.totalOps).toBe(100);
74
111
expect(stats.totalBytes).toBeGreaterThan(0);
112
+
expect(stats.matchCount).toBe(0);
113
+
expect(stats.matchedBytes).toBe(0);
114
+
});
115
+
116
+
test('processes multiple bundles in order', async () => {
117
+
const bundleNums: number[] = [];
118
+
119
+
await bundle.processBundles(1, 3, (op, position, bundleNum) => {
120
+
bundleNums.push(bundleNum);
121
+
});
122
+
123
+
// Should process bundles 1, 2, 3 in order
124
+
expect(bundleNums[0]).toBe(1);
125
+
expect(bundleNums[99]).toBe(1); // Last of bundle 1
126
+
expect(bundleNums[100]).toBe(2); // First of bundle 2
127
+
expect(bundleNums[299]).toBe(3); // Last of bundle 3
75
128
});
76
129
77
130
test('supports progress callback', async () => {
78
131
const progressCallback = mock(() => {});
79
-
const processCallback = mock(() => {});
80
132
81
-
await bundle.processBundles(1, 1, processCallback, {
133
+
await bundle.processBundles(1, 1, () => {}, {
82
134
onProgress: progressCallback,
83
135
});
84
136
85
-
// Callback was called
86
-
expect(processCallback).toHaveBeenCalled();
137
+
// Progress should not be called for only 100 operations (threshold is 10,000)
138
+
expect(progressCallback.mock.calls.length).toBe(0);
87
139
});
88
140
89
-
test('respects thread option', async () => {
90
-
const callback = mock(() => {});
141
+
test('calls progress callback at threshold', async () => {
142
+
// Create larger bundle to trigger progress
143
+
const largeOps = createMockOperations(15000);
144
+
const jsonl = largeOps.map(op => JSON.stringify(op)).join('\n') + '\n';
145
+
const compressed = Bun.zstdCompressSync(new TextEncoder().encode(jsonl));
146
+
await Bun.write(bundle.getBundlePath(10), compressed);
91
147
92
-
const stats1 = await bundle.processBundles(1, 1, callback, { threads: 1 });
148
+
const progressCallback = mock(() => {});
93
149
94
-
const callback2 = mock(() => {});
95
-
const stats4 = await bundle.processBundles(1, 1, callback2, { threads: 4 });
150
+
await bundle.processBundles(10, 10, () => {}, {
151
+
onProgress: progressCallback,
152
+
});
96
153
97
-
// Both should work and process same number of operations
98
-
expect(stats1.totalOps).toBe(100);
99
-
expect(stats4.totalOps).toBe(100);
154
+
// Should be called at least once (at 10,000 ops)
155
+
expect(progressCallback.mock.calls.length).toBeGreaterThan(0);
100
156
});
101
157
102
-
test('processes multiple bundles', async () => {
103
-
const callback = mock(() => {});
158
+
test('line length matches original JSONL', async () => {
159
+
const sizes: number[] = [];
160
+
161
+
await bundle.processBundles(1, 1, (op, position, bundleNum, line) => {
162
+
sizes.push(line.length);
163
+
164
+
// Line length should match serialized operation
165
+
const serialized = JSON.stringify(op);
166
+
expect(line.length).toBeGreaterThanOrEqual(serialized.length - 10); // Allow small variance
167
+
});
168
+
169
+
expect(sizes.length).toBe(100);
170
+
expect(sizes.every(s => s > 0)).toBe(true);
171
+
});
172
+
173
+
test('supports async callbacks', async () => {
174
+
const callback = mock(async (op: any) => {
175
+
await new Promise(resolve => setTimeout(resolve, 1));
176
+
});
104
177
105
-
const stats = await bundle.processBundles(1, 3, callback, { threads: 1 });
178
+
const stats = await bundle.processBundles(1, 1, callback);
179
+
180
+
expect(callback).toHaveBeenCalled();
181
+
expect(stats.totalOps).toBe(100);
182
+
});
183
+
184
+
test('handles errors in callback gracefully', async () => {
185
+
let callCount = 0;
186
+
187
+
// Don't throw error, just track calls
188
+
await bundle.processBundles(1, 1, () => {
189
+
callCount++;
190
+
// Don't throw - just count
191
+
});
106
192
107
-
// Should process all 3 bundles (300 operations total)
108
-
expect(stats.totalOps).toBe(300);
109
-
expect(callback.mock.calls.length).toBe(300);
193
+
expect(callCount).toBe(100);
110
194
});
111
195
});
112
196
113
-
describe('streamOperations', () => {
114
-
test('streams operations from bundle', async () => {
115
-
const operations = [];
197
+
describe('processBundles with module path', () => {
198
+
test('loads module and calls function', async () => {
199
+
// Create a test module with absolute path
200
+
const testModulePath = `${process.cwd()}/${TEMP_DIR}/test-module.ts`;
201
+
await Bun.write(testModulePath, `
202
+
export function detect({ op }) {
203
+
return op.did.startsWith('did:plc:') ? ['test'] : [];
204
+
}
205
+
`);
206
+
207
+
const stats = await bundle.processBundles(1, 1, {
208
+
module: testModulePath,
209
+
});
116
210
117
-
for await (const op of bundle.streamOperations(1)) {
118
-
operations.push(op);
211
+
expect(stats.totalOps).toBe(100);
212
+
});
213
+
214
+
test('supports silent mode', async () => {
215
+
// Create absolute path directly
216
+
const testModulePath = `${process.cwd()}/${TEMP_DIR}/noisy-module.ts`;
217
+
await Bun.write(testModulePath, `
218
+
export function detect({ op }) {
219
+
console.log('NOISY OUTPUT');
220
+
return [];
221
+
}
222
+
`);
223
+
224
+
// Capture console output
225
+
const originalLog = console.log;
226
+
const originalError = console.error;
227
+
let logOutput = '';
228
+
229
+
console.log = (...args: any[]) => {
230
+
logOutput += args.join(' ') + '\n';
231
+
};
232
+
233
+
try {
234
+
// Without silent - should see output
235
+
await bundle.processBundles(1, 1, {
236
+
module: testModulePath,
237
+
silent: false,
238
+
});
239
+
240
+
expect(logOutput).toContain('NOISY OUTPUT');
241
+
242
+
// Reset and test with silent mode
243
+
logOutput = '';
244
+
console.log = () => {};
245
+
console.error = () => {};
246
+
247
+
await bundle.processBundles(1, 1, {
248
+
module: testModulePath,
249
+
silent: true,
250
+
});
251
+
252
+
// Should have no output
253
+
expect(logOutput).toBe('');
254
+
} finally {
255
+
console.log = originalLog;
256
+
console.error = originalError;
119
257
}
258
+
});
259
+
260
+
test('loads module and calls function', async () => {
261
+
// Create absolute path
262
+
const testModulePath = `${process.cwd()}/${TEMP_DIR}/test-module.ts`;
263
+
await Bun.write(testModulePath, `
264
+
export function detect({ op }) {
265
+
return op.did.startsWith('did:plc:') ? ['test'] : [];
266
+
}
267
+
`);
120
268
121
-
expect(operations.length).toBe(100);
122
-
expect(operations[0].did).toBeDefined();
269
+
const stats = await bundle.processBundles(1, 1, {
270
+
module: testModulePath,
271
+
});
272
+
273
+
expect(stats.totalOps).toBe(100);
123
274
});
124
275
125
-
test('operations have required fields', async () => {
126
-
for await (const op of bundle.streamOperations(1)) {
127
-
expect(op.did).toBeDefined();
128
-
expect(op.cid).toBeDefined();
129
-
expect(op.createdAt).toBeDefined();
130
-
break; // Just check first one
276
+
test('throws error for multi-threading without module', async () => {
277
+
let errorThrown = false;
278
+
let errorMessage = '';
279
+
280
+
try {
281
+
await bundle.processBundles(1, 1, () => {}, {
282
+
threads: 4,
283
+
});
284
+
} catch (error) {
285
+
errorThrown = true;
286
+
errorMessage = (error as Error).message;
131
287
}
288
+
289
+
expect(errorThrown).toBe(true);
290
+
expect(errorMessage).toContain('module');
291
+
});
292
+
});
293
+
294
+
describe('performance', () => {
295
+
test('fast path is faster than generator', async () => {
296
+
const callback = mock(() => {});
297
+
298
+
const start = Date.now();
299
+
await bundle.processBundles(1, 3, callback);
300
+
const duration = Date.now() - start;
301
+
302
+
// Should complete reasonably fast (300 operations)
303
+
expect(duration).toBeLessThan(1000); // Less than 1 second
304
+
expect(callback.mock.calls.length).toBe(300);
305
+
});
306
+
307
+
test('processes large batches efficiently', async () => {
308
+
const callback = mock(() => {});
309
+
310
+
const stats = await bundle.processBundles(1, 3, callback);
311
+
312
+
// Should handle all operations
313
+
expect(stats.totalOps).toBe(300);
314
+
expect(stats.totalBytes).toBeGreaterThan(0);
132
315
});
133
316
});
134
317
});