+34
-1
src/index.ts
+34
-1
src/index.ts
···
1
+
/**
2
+
* plcbundle - Zero-dependency library for working with PLC bundle archives
3
+
*
4
+
* This module provides a Bun-native implementation for reading, processing,
5
+
* and cloning PLC (Placeholder DID) bundle archives. It leverages Bun's native
6
+
* features for optimal performance.
7
+
*
8
+
* @example
9
+
* ```ts
10
+
* import { PLCBundle } from "@yourscope/plcbundle-bun";
11
+
*
12
+
* // Create bundle instance
13
+
* const bundle = new PLCBundle("./bundles");
14
+
*
15
+
* // Clone from remote
16
+
* await bundle.clone("https://plcbundle.atscan.net", {
17
+
* bundles: "1-100",
18
+
* threads: 8
19
+
* });
20
+
*
21
+
* // Process operations
22
+
* await bundle.processBundles(1, 10, (op, pos, num) => {
23
+
* console.log(op.did);
24
+
* });
25
+
* ```
26
+
*
27
+
* @module
28
+
*/
29
+
1
30
export { PLCBundle } from './plcbundle';
2
31
export type {
3
32
BundleIndex,
4
33
BundleMetadata,
5
34
Operation,
6
-
ProcessCallback
35
+
ProcessCallback,
36
+
ProcessStats,
37
+
ProcessOptions,
38
+
CloneOptions,
39
+
CloneStats,
7
40
} from './types';
+284
-13
src/plcbundle.ts
+284
-13
src/plcbundle.ts
···
19
19
}
20
20
21
21
/**
22
-
* Bundle reader and processor for plcbundle format
22
+
* Main class for reading and processing PLC bundle archives.
23
+
*
24
+
* This class provides methods for:
25
+
* - Cloning bundles from remote repositories
26
+
* - Reading and verifying local bundles
27
+
* - Streaming operations from bundles
28
+
* - Processing bundles with custom callbacks
29
+
*
30
+
* All operations use Bun's native features for optimal performance.
31
+
*
32
+
* @example Basic usage
33
+
* ```ts
34
+
* const bundle = new PLCBundle('./my-bundles');
35
+
*
36
+
* // Get repository information
37
+
* const stats = await bundle.getStats();
38
+
* console.log(`Repository has ${stats.lastBundle} bundles`);
39
+
*
40
+
* // Stream operations from a bundle
41
+
* for await (const op of bundle.streamOperations(1)) {
42
+
* console.log(op.did);
43
+
* }
44
+
* ```
45
+
*
46
+
* @example Clone from remote
47
+
* ```ts
48
+
* const bundle = new PLCBundle('./bundles');
49
+
*
50
+
* await bundle.clone('https://plcbundle.atscan.net', {
51
+
* bundles: '1-100',
52
+
* threads: 8,
53
+
* verify: true,
54
+
* onProgress: (stats) => {
55
+
* console.log(`${stats.downloadedBundles}/${stats.totalBundles}`);
56
+
* }
57
+
* });
58
+
* ```
59
+
*
60
+
* @example Process with callback
61
+
* ```ts
62
+
* await bundle.processBundles(1, 10, (op, pos, num) => {
63
+
* if (op.did.startsWith('did:plc:')) {
64
+
* console.log(`Found DID: ${op.did}`);
65
+
* }
66
+
* }, {
67
+
* threads: 4,
68
+
* onProgress: (stats) => console.log(`${stats.totalOps} ops`)
69
+
* });
70
+
* ```
23
71
*/
24
72
export class PLCBundle {
25
73
private dir: string;
26
74
private indexPath: string;
27
75
private cachedIndex?: BundleIndex;
28
76
77
+
/**
78
+
* Create a new PLCBundle instance.
79
+
*
80
+
* @param dir - Directory containing bundle files (default: './')
81
+
* @param indexPath - Path to the index file (default: `${dir}/plc_bundles.json`)
82
+
*
83
+
* @example
84
+
* ```ts
85
+
* // Use default directory
86
+
* const bundle1 = new PLCBundle();
87
+
*
88
+
* // Specify custom directory
89
+
* const bundle2 = new PLCBundle('./my-bundles');
90
+
*
91
+
* // Custom directory and index path
92
+
* const bundle3 = new PLCBundle('./bundles', './custom-index.json');
93
+
* ```
94
+
*/
29
95
constructor(dir: string = './', indexPath?: string) {
30
96
this.dir = dir.endsWith('/') ? dir : `${dir}/`;
31
97
this.indexPath = indexPath || `${this.dir}plc_bundles.json`;
32
98
}
33
99
34
100
/**
35
-
* Load and cache the bundle index
101
+
* Load the bundle index from disk.
102
+
*
103
+
* The index is cached in memory after first load. Use `refresh` parameter
104
+
* to force reloading from disk.
105
+
*
106
+
* @param refresh - If true, reload from disk even if cached (default: false)
107
+
* @returns Promise resolving to the bundle index
108
+
* @throws Error if index file cannot be read or parsed
109
+
*
110
+
* @example
111
+
* ```ts
112
+
* // Load index (uses cache if available)
113
+
* const index = await bundle.loadIndex();
114
+
*
115
+
* // Force reload from disk
116
+
* const freshIndex = await bundle.loadIndex(true);
117
+
* ```
36
118
*/
37
119
async loadIndex(refresh = false): Promise<BundleIndex> {
38
120
if (!refresh && this.cachedIndex) {
···
45
127
}
46
128
47
129
/**
48
-
* Save the bundle index
130
+
* Save a bundle index to disk.
131
+
*
132
+
* Writes the index as formatted JSON and updates the in-memory cache.
133
+
* The index is written atomically using Bun's file API.
134
+
*
135
+
* @param index - The bundle index to save
136
+
* @returns Promise that resolves when save is complete
137
+
*
138
+
* @example
139
+
* ```ts
140
+
* const index = await bundle.loadIndex();
141
+
* index.last_bundle = 150;
142
+
* await bundle.saveIndex(index);
143
+
* ```
49
144
*/
50
145
async saveIndex(index: BundleIndex): Promise<void> {
51
146
await Bun.write(this.indexPath, JSON.stringify(index, null, 2));
···
53
148
}
54
149
55
150
/**
56
-
* Get metadata for a specific bundle
151
+
* Get metadata for a specific bundle.
152
+
*
153
+
* @param bundleNum - The bundle number to retrieve metadata for
154
+
* @returns Promise resolving to bundle metadata, or undefined if not found
155
+
*
156
+
* @example
157
+
* ```ts
158
+
* const metadata = await bundle.getMetadata(42);
159
+
* if (metadata) {
160
+
* console.log(`Bundle ${metadata.bundle_number} has ${metadata.operation_count} operations`);
161
+
* }
162
+
* ```
57
163
*/
58
164
async getMetadata(bundleNum: number): Promise<BundleMetadata | undefined> {
59
165
const index = await this.loadIndex();
···
61
167
}
62
168
63
169
/**
64
-
* Get bundle file path
170
+
* Get the file path for a specific bundle.
171
+
*
172
+
* Bundles are named with zero-padded 6-digit numbers (e.g., `000042.jsonl.zst`).
173
+
*
174
+
* @param bundleNum - The bundle number
175
+
* @returns Full path to the bundle file
176
+
*
177
+
* @example
178
+
* ```ts
179
+
* const path = bundle.getBundlePath(42);
180
+
* // Returns: "./bundles/000042.jsonl.zst"
181
+
* ```
65
182
*/
66
183
getBundlePath(bundleNum: number): string {
67
184
return `${this.dir}${bundleNum.toString().padStart(6, '0')}.jsonl.zst`;
68
185
}
69
186
70
187
/**
71
-
* Read and decompress a bundle
188
+
* Read and decompress a bundle file.
189
+
*
190
+
* Uses Bun's native zstd decompression for optimal performance.
191
+
*
192
+
* @param bundleNum - The bundle number to read
193
+
* @returns Promise resolving to the decompressed JSONL content as a string
194
+
* @throws Error if bundle file cannot be read or decompressed
195
+
*
196
+
* @example
197
+
* ```ts
198
+
* const jsonl = await bundle.readBundle(1);
199
+
* console.log(`Bundle size: ${jsonl.length} bytes`);
200
+
* ```
72
201
*/
73
202
async readBundle(bundleNum: number): Promise<string> {
74
203
const path = this.getBundlePath(bundleNum);
···
78
207
}
79
208
80
209
/**
81
-
* Parse operations from bundle content
210
+
* Parse operations from JSONL content.
211
+
*
212
+
* @param content - JSONL string with one operation per line
213
+
* @returns Array of parsed operations
214
+
*
215
+
* @example
216
+
* ```ts
217
+
* const jsonl = await bundle.readBundle(1);
218
+
* const operations = bundle.parseOperations(jsonl);
219
+
* console.log(`Parsed ${operations.length} operations`);
220
+
* ```
82
221
*/
83
222
parseOperations(content: string): Operation[] {
84
223
return content
···
88
227
}
89
228
90
229
/**
91
-
* Stream operations from a bundle (memory efficient)
230
+
* Stream operations from a bundle (memory efficient).
231
+
*
232
+
* This async generator yields operations one at a time, which is more
233
+
* memory efficient than loading all operations at once.
234
+
*
235
+
* @param bundleNum - The bundle number to stream from
236
+
* @yields Operations from the bundle
237
+
*
238
+
* @example
239
+
* ```ts
240
+
* for await (const op of bundle.streamOperations(1)) {
241
+
* console.log(op.did);
242
+
* if (someCondition) break; // Can stop early
243
+
* }
244
+
* ```
92
245
*/
93
246
async *streamOperations(bundleNum: number): AsyncGenerator<Operation> {
94
247
const content = await this.readBundle(bundleNum);
···
102
255
}
103
256
104
257
/**
105
-
* Process multiple bundles with a callback (supports multi-threading)
258
+
* Process multiple bundles with a callback function.
259
+
*
260
+
* Supports both single-threaded and multi-threaded processing via {@link ProcessOptions}.
261
+
* Operations are processed in chronological order.
262
+
*
263
+
* @param start - First bundle number to process (inclusive)
264
+
* @param end - Last bundle number to process (inclusive)
265
+
* @param callback - Function called for each operation
266
+
* @param options - Processing options (threads, progress callback)
267
+
* @returns Promise resolving to processing statistics
268
+
*
269
+
* @example Single-threaded
270
+
* ```ts
271
+
* await bundle.processBundles(1, 10, (op, pos, num) => {
272
+
* console.log(`Bundle ${num}, pos ${pos}: ${op.did}`);
273
+
* });
274
+
* ```
275
+
*
276
+
* @example Multi-threaded with progress
277
+
* ```ts
278
+
* await bundle.processBundles(1, 100, (op) => {
279
+
* // Process operation
280
+
* }, {
281
+
* threads: 4,
282
+
* onProgress: (stats) => {
283
+
* console.log(`Processed ${stats.totalOps} operations`);
284
+
* }
285
+
* });
286
+
* ```
106
287
*/
107
288
async processBundles(
108
289
start: number,
···
168
349
}
169
350
170
351
/**
171
-
* Clone bundles from a remote repository
352
+
* Clone bundles from a remote repository.
353
+
*
354
+
* Downloads bundles via HTTP, verifies hashes, and saves progress periodically.
355
+
* Supports resuming interrupted downloads - bundles that already exist and
356
+
* pass verification will be skipped.
357
+
*
358
+
* Progress is automatically saved every 5 seconds and on completion.
359
+
*
360
+
* @param remoteUrl - Base URL of the remote bundle repository
361
+
* @param options - Clone options (bundles, threads, verification, callbacks)
362
+
* @returns Promise resolving to clone statistics
363
+
* @throws Error if remote is unreachable or bundle range is invalid
364
+
*
365
+
* @example Clone all bundles
366
+
* ```ts
367
+
* await bundle.clone('https://plcbundle.atscan.net', {
368
+
* threads: 8,
369
+
* verify: true
370
+
* });
371
+
* ```
372
+
*
373
+
* @example Clone specific range with progress
374
+
* ```ts
375
+
* await bundle.clone('https://plcbundle.atscan.net', {
376
+
* bundles: '1-100',
377
+
* threads: 4,
378
+
* onProgress: (stats) => {
379
+
* const pct = (stats.downloadedBytes / stats.totalBytes * 100).toFixed(1);
380
+
* console.log(`${pct}% complete`);
381
+
* }
382
+
* });
383
+
* ```
384
+
*
385
+
* @example Resume interrupted download
386
+
* ```ts
387
+
* // First run - interrupted
388
+
* await bundle.clone('https://plcbundle.atscan.net', { bundles: '1-1000' });
389
+
*
390
+
* // Second run - automatically resumes
391
+
* await bundle.clone('https://plcbundle.atscan.net', { bundles: '1-1000' });
392
+
* ```
172
393
*/
173
394
async clone(
174
395
remoteUrl: string,
···
398
619
}
399
620
400
621
/**
401
-
* Verify bundle integrity
622
+
* Verify the integrity of a bundle.
623
+
*
624
+
* Checks both the compressed file hash and the content hash against
625
+
* the values stored in the bundle index.
626
+
*
627
+
* Uses Bun's native SHA-256 hasher for optimal performance.
628
+
*
629
+
* @param bundleNum - The bundle number to verify
630
+
* @returns Promise resolving to verification result
631
+
*
632
+
* @example
633
+
* ```ts
634
+
* const result = await bundle.verifyBundle(42);
635
+
* if (result.valid) {
636
+
* console.log('Bundle is valid');
637
+
* } else {
638
+
* console.error('Verification failed:');
639
+
* result.errors.forEach(err => console.error(` - ${err}`));
640
+
* }
641
+
* ```
402
642
*/
403
643
async verifyBundle(bundleNum: number): Promise<{ valid: boolean; errors: string[] }> {
404
644
const metadata = await this.getMetadata(bundleNum);
···
429
669
}
430
670
431
671
/**
432
-
* Calculate chain hash
672
+
* Calculate a chain hash linking bundles together.
673
+
*
674
+
* The chain hash ensures bundles form an unbroken chain and haven't
675
+
* been tampered with. Genesis bundles use a special hash format.
676
+
*
677
+
* @param parentHash - Hash of the parent bundle (empty for genesis)
678
+
* @param contentHash - Hash of this bundle's content
679
+
* @param isGenesis - Whether this is the first bundle in the chain
680
+
* @returns The calculated chain hash as a hex string
681
+
*
682
+
* @example
683
+
* ```ts
684
+
* // Genesis bundle
685
+
* const genesisHash = bundle.calculateChainHash('', contentHash, true);
686
+
*
687
+
* // Subsequent bundle
688
+
* const chainHash = bundle.calculateChainHash(parentHash, contentHash, false);
689
+
* ```
433
690
*/
434
691
calculateChainHash(parentHash: string, contentHash: string, isGenesis: boolean): string {
435
692
const input = isGenesis
···
440
697
}
441
698
442
699
/**
443
-
* Get bundle statistics
700
+
* Get repository statistics.
701
+
*
702
+
* Provides a quick overview of the bundle repository without loading
703
+
* all bundle metadata.
704
+
*
705
+
* @returns Promise resolving to repository statistics
706
+
*
707
+
* @example
708
+
* ```ts
709
+
* const stats = await bundle.getStats();
710
+
* console.log(`Version: ${stats.version}`);
711
+
* console.log(`Bundles: ${stats.totalBundles}`);
712
+
* console.log(`Size: ${(stats.totalSize / 1e9).toFixed(2)} GB`);
713
+
* console.log(`Updated: ${stats.updatedAt}`);
714
+
* ```
444
715
*/
445
716
async getStats(): Promise<{
446
717
version: string;
+155
src/types.ts
+155
src/types.ts
···
1
+
/**
2
+
* Type definitions for plcbundle library
3
+
*
4
+
* This module contains all TypeScript type definitions used throughout
5
+
* the plcbundle library.
6
+
*
7
+
* @module
8
+
*/
9
+
10
+
/**
11
+
* Metadata for a single bundle in the repository.
12
+
*
13
+
* Contains information about the bundle's contents, hashes for verification,
14
+
* and temporal boundaries.
15
+
*/
1
16
export interface BundleMetadata {
17
+
/** Sequential number identifying this bundle (e.g., 1, 2, 3...) */
2
18
bundle_number: number;
19
+
20
+
/** ISO 8601 timestamp of the first operation in this bundle */
3
21
start_time: string;
22
+
23
+
/** ISO 8601 timestamp of the last operation in this bundle */
4
24
end_time: string;
25
+
26
+
/** Total number of PLC operations contained in this bundle */
5
27
operation_count: number;
28
+
29
+
/** Number of unique DIDs referenced in this bundle */
6
30
did_count: number;
31
+
32
+
/** Chain hash linking this bundle to its predecessor */
7
33
hash: string;
34
+
35
+
/** SHA-256 hash of the uncompressed JSONL content */
8
36
content_hash: string;
37
+
38
+
/** Chain hash of the previous bundle (empty string for genesis bundle) */
9
39
parent: string;
40
+
41
+
/** SHA-256 hash of the compressed .jsonl.zst file */
10
42
compressed_hash: string;
43
+
44
+
/** Size of the compressed bundle file in bytes */
11
45
compressed_size: number;
46
+
47
+
/** Size of the uncompressed JSONL content in bytes */
12
48
uncompressed_size: number;
49
+
50
+
/** Cursor for fetching subsequent operations (end_time of previous bundle) */
13
51
cursor: string;
52
+
53
+
/** ISO 8601 timestamp when this bundle was created */
14
54
created_at: string;
15
55
}
16
56
57
+
/**
58
+
* Index file containing metadata for all bundles in a repository.
59
+
*
60
+
* This is the main entry point for discovering available bundles.
61
+
* Located at `plc_bundles.json` in the repository root.
62
+
*/
17
63
export interface BundleIndex {
64
+
/** Version of the index format (currently "1.0") */
18
65
version: string;
66
+
67
+
/** Bundle number of the most recent bundle */
19
68
last_bundle: number;
69
+
70
+
/** ISO 8601 timestamp when the index was last updated */
20
71
updated_at: string;
72
+
73
+
/** Total size of all compressed bundles in bytes */
21
74
total_size_bytes: number;
75
+
76
+
/** Array of metadata for each bundle, sorted by bundle_number */
22
77
bundles: BundleMetadata[];
23
78
}
24
79
80
+
/**
81
+
* A single PLC operation as stored in bundles.
82
+
*
83
+
* Operations represent changes to DID documents in the PLC directory.
84
+
*/
25
85
export interface Operation {
86
+
/** Decentralized Identifier (DID) this operation applies to */
26
87
did: string;
88
+
89
+
/** Content Identifier (CID) of this operation */
27
90
cid: string;
91
+
92
+
/** The actual operation data containing DID document changes */
28
93
operation: any;
94
+
95
+
/** ISO 8601 timestamp when this operation was created */
29
96
createdAt: string;
97
+
98
+
/** Additional fields that may be present in operations */
30
99
[key: string]: any;
31
100
}
32
101
102
+
/**
103
+
* Callback function called for each operation during processing.
104
+
*
105
+
* @param op - The operation being processed
106
+
* @param position - Zero-based position of the operation within its bundle
107
+
* @param bundleNum - The bundle number being processed
108
+
*
109
+
* @example
110
+
* ```ts
111
+
* const callback: ProcessCallback = (op, position, bundleNum) => {
112
+
* if (op.did.startsWith('did:plc:test')) {
113
+
* console.log(`Found test DID at bundle ${bundleNum}, position ${position}`);
114
+
* }
115
+
* };
116
+
* ```
117
+
*/
33
118
export type ProcessCallback = (
34
119
op: Operation,
35
120
position: number,
36
121
bundleNum: number
37
122
) => void | Promise<void>;
38
123
124
+
/**
125
+
* Statistics collected during bundle processing.
126
+
*
127
+
* Tracks the number of operations and bytes processed.
128
+
*/
39
129
export interface ProcessStats {
130
+
/** Total number of operations processed */
40
131
totalOps: number;
132
+
133
+
/** Number of operations that matched criteria (if applicable) */
41
134
matchCount: number;
135
+
136
+
/** Total bytes of operation data processed */
42
137
totalBytes: number;
138
+
139
+
/** Bytes of data for matched operations (if applicable) */
43
140
matchedBytes: number;
44
141
}
45
142
143
+
/**
144
+
* Options for processing bundles.
145
+
*
146
+
* Allows customization of parallel processing and progress reporting.
147
+
*/
46
148
export interface ProcessOptions {
149
+
/** Number of worker threads to use for parallel processing (default: 1) */
47
150
threads?: number;
151
+
152
+
/**
153
+
* Callback invoked periodically to report processing progress.
154
+
* Called approximately every 10,000 operations.
155
+
*
156
+
* @param stats - Current processing statistics
157
+
*/
48
158
onProgress?: (stats: ProcessStats) => void;
49
159
}
50
160
161
+
/**
162
+
* Options for cloning bundles from a remote repository.
163
+
*
164
+
* Controls download behavior, verification, and progress reporting.
165
+
*/
51
166
export interface CloneOptions {
167
+
/** Number of parallel download threads (default: 4) */
52
168
threads?: number;
169
+
170
+
/**
171
+
* Bundle selection specification.
172
+
*
173
+
* Can be:
174
+
* - A single bundle number: `"42"`
175
+
* - A range: `"1-100"`
176
+
* - Undefined to clone all available bundles
177
+
*/
53
178
bundles?: string;
179
+
180
+
/** Whether to verify SHA-256 hashes of downloaded bundles (default: true) */
54
181
verify?: boolean;
182
+
183
+
/**
184
+
* Function to check if cloning should stop (for graceful shutdown).
185
+
*
186
+
* @returns `true` if cloning should stop, `false` to continue
187
+
*/
55
188
shouldStop?: () => boolean;
189
+
190
+
/**
191
+
* Callback invoked to report download progress.
192
+
*
193
+
* @param stats - Current download statistics
194
+
*/
56
195
onProgress?: (stats: CloneStats) => void;
57
196
}
58
197
198
+
/**
199
+
* Statistics collected during bundle cloning.
200
+
*
201
+
* Tracks download progress, including successes, skips, and failures.
202
+
*/
59
203
export interface CloneStats {
204
+
/** Total number of bundles to download */
60
205
totalBundles: number;
206
+
207
+
/** Number of bundles successfully downloaded in this session */
61
208
downloadedBundles: number;
209
+
210
+
/** Number of bundles skipped (already existed and verified) */
62
211
skippedBundles: number;
212
+
213
+
/** Number of bundles that failed to download */
63
214
failedBundles: number;
215
+
216
+
/** Total bytes to download across all bundles */
64
217
totalBytes: number;
218
+
219
+
/** Bytes downloaded so far (including skipped bundles) */
65
220
downloadedBytes: number;
66
221
}