+3
README.md
+3
README.md
···
43
43
<td><code>tid</code>: create and parse TID identifiers</td>
44
44
</tr>
45
45
<tr>
46
+
<td><code>car</code>: read AT Protocol's CAR (content-addressable archive) repositories</td>
47
+
</tr>
48
+
<tr>
46
49
<td><code>cid</code>: create and parse the blessed CIDv1 format</td>
47
50
</tr>
48
51
<tr>
+9
packages/utilities/car/README.md
+9
packages/utilities/car/README.md
+107
packages/utilities/car/lib/atproto-repo.ts
+107
packages/utilities/car/lib/atproto-repo.ts
···
1
+
import * as CBOR from '@atcute/cbor';
2
+
import * as CID from '@atcute/cid';
3
+
4
+
import { fromUint8Array } from './reader.js';
5
+
6
+
const decoder = new TextDecoder();
7
+
8
+
export interface RepoEntry {
9
+
collection: string;
10
+
rkey: string;
11
+
record: unknown;
12
+
}
13
+
14
+
export function* iterateAtpCar(buf: Uint8Array): Generator<RepoEntry> {
15
+
const { roots, iterate } = fromUint8Array(new Uint8Array(buf));
16
+
assert(roots.length === 1, `expected only 1 root in the car archive; got=${roots.length}`);
17
+
18
+
const blockmap: BlockMap = new Map();
19
+
for (const entry of iterate()) {
20
+
blockmap.set(CID.format(entry.cid), entry.bytes);
21
+
}
22
+
23
+
const commit = readObject(blockmap, roots[0]) as Commit;
24
+
for (const { key, cid } of walkEntries(blockmap, commit.data)) {
25
+
const [collection, rkey] = key.split('/');
26
+
const record = readObject(blockmap, cid);
27
+
28
+
yield { collection, rkey, record };
29
+
}
30
+
}
31
+
32
+
function readObject(map: BlockMap, link: CBOR.CIDLink): unknown {
33
+
const cid = link.$link;
34
+
35
+
const bytes = map.get(cid);
36
+
assert(bytes != null, `cid not found in blockmap; cid=${cid}`);
37
+
38
+
const data = CBOR.decode(bytes);
39
+
40
+
return data;
41
+
}
42
+
43
+
function* walkEntries(map: BlockMap, pointer: CBOR.CIDLink): Generator<NodeEntry> {
44
+
const data = readObject(map, pointer) as MstNode;
45
+
const entries = data.e;
46
+
47
+
let lastKey = '';
48
+
49
+
if (data.l !== null) {
50
+
yield* walkEntries(map, data.l);
51
+
}
52
+
53
+
for (let i = 0, il = entries.length; i < il; i++) {
54
+
const entry = entries[i];
55
+
56
+
const key_str = decoder.decode(CBOR.fromBytes(entry.k));
57
+
const key = lastKey.slice(0, entry.p) + key_str;
58
+
59
+
lastKey = key;
60
+
61
+
yield { key: key, cid: entry.v };
62
+
63
+
if (entry.t !== null) {
64
+
yield* walkEntries(map, entry.t);
65
+
}
66
+
}
67
+
}
68
+
69
+
function assert(condition: boolean, message: string): asserts condition {
70
+
if (!condition) {
71
+
throw new Error(message);
72
+
}
73
+
}
74
+
75
+
type BlockMap = Map<string, Uint8Array>;
76
+
77
+
interface Commit {
78
+
version: 3;
79
+
did: string;
80
+
data: CBOR.CIDLink;
81
+
rev: string;
82
+
prev: CBOR.CIDLink | null;
83
+
sig: CBOR.Bytes;
84
+
}
85
+
86
+
interface TreeEntry {
87
+
/** count of bytes shared with previous TreeEntry in this Node (if any) */
88
+
p: number;
89
+
/** remainder of key for this TreeEntry, after "prefixlen" have been removed */
90
+
k: CBOR.Bytes;
91
+
/** link to a sub-tree Node at a lower level which has keys sorting after this TreeEntry's key (to the "right"), but before the next TreeEntry's key in this Node (if any) */
92
+
v: CBOR.CIDLink;
93
+
/** next subtree (to the right of leaf) */
94
+
t: CBOR.CIDLink | null;
95
+
}
96
+
97
+
interface MstNode {
98
+
/** link to sub-tree Node on a lower level and with all keys sorting before keys at this node */
99
+
l: CBOR.CIDLink | null;
100
+
/** ordered list of TreeEntry objects */
101
+
e: TreeEntry[];
102
+
}
103
+
104
+
interface NodeEntry {
105
+
key: string;
106
+
cid: CBOR.CIDLink;
107
+
}
+69
packages/utilities/car/lib/index.test.ts
+69
packages/utilities/car/lib/index.test.ts
···
1
+
import { expect, it } from 'bun:test';
2
+
3
+
import { iterateAtpCar } from './index.js';
4
+
5
+
it('decodes atproto car files', () => {
6
+
const buf = Buffer.from(
7
+
'OqJlcm9vdHOB2CpYJQABcRIgkD8I0DL+GsJ3OKREpf9k73yHguuSEYzEiXPGueoJg8FndmVy' +
8
+
'c2lvbgGPAQFxEiDqG8o/D37K3hldhQTMRq9/Uvyf7X9evn9eB9ZdgpYq6qRlJHR5cGV2YXBw' +
9
+
'LmJza3kuYWN0b3IucHJvZmlsZWljcmVhdGVkQXR4GDIwMjQtMDItMjRUMTI6MTU6NDEuMjE5' +
10
+
'WmtkZXNjcmlwdGlvbm90ZXN0aW5nIGFjY291bnRrZGlzcGxheU5hbWVg4AEBcRIgkD8I0DL+' +
11
+
'GsJ3OKREpf9k73yHguuSEYzEiXPGueoJg8GmY2RpZHggZGlkOnBsYzpzcmNxb3UybTd1cXVv' +
12
+
'Z3lkNXhrNGI1eTVjcmV2bTNsNXE1ZmplbnRjMmRjc2lnWEDeWWEO5/vV6SmnbUrLRu9WhWqI' +
13
+
'kHKANGFOin3xqFc4fgtuYzkbFXFJDMQU06nBWxict8FQ8Kas9Mr2fDAh++vVZGRhdGHYKlgl' +
14
+
'AAFxEiB2ibkpj3r4cdTag9v2ipIe8fxyjUFOgCjZbtYnfhyJ2GRwcmV29md2ZXJzaW9uA6QB' +
15
+
'AXESIHaJuSmPevhx1NqD2/aKkh7x/HKNQU6AKNlu1id+HInYomFlgaRha1gbYXBwLmJza3ku' +
16
+
'YWN0b3IucHJvZmlsZS9zZWxmYXAAYXTYKlglAAFxEiBvSJJSaF/w/fee+UmoLV84FDwZRC7p' +
17
+
'pJX484MghY0rM2F22CpYJQABcRIg6hvKPw9+yt4ZXYUEzEavf1L8n+1/Xr5/XgfWXYKWKuph' +
18
+
'bPaBAQFxEiBvSJJSaF/w/fee+UmoLV84FDwZRC7ppJX484MghY0rM6JhZYGkYWtYIGFwcC5i' +
19
+
'c2t5LmZlZWQucG9zdC8za201eW1rNGhoazJ6YXAAYXT2YXbYKlglAAFxEiDj+gU903L3F3Ar' +
20
+
'WCg+aeQZYEiM3ooIxqHbVvbQPZvEbGFs9qECAXESIOP6BT3TcvcXcCtYKD5p5BlgSIzeigjG' +
21
+
'odtW9tA9m8RspWR0ZXh0dWJlZXAgYm9vcCBAbWFyeS5teS5pZGUkdHlwZXJhcHAuYnNreS5m' +
22
+
'ZWVkLnBvc3RlbGFuZ3OBYmVuZmZhY2V0c4GjZSR0eXBld2FwcC5ic2t5LnJpY2h0ZXh0LmZh' +
23
+
'Y2V0ZWluZGV4omdieXRlRW5kFWlieXRlU3RhcnQKaGZlYXR1cmVzgaJjZGlkeCBkaWQ6cGxj' +
24
+
'OmlhNzZrdm5uZGp1dGdlZGdneDJpYnJlbWUkdHlwZXgfYXBwLmJza3kucmljaHRleHQuZmFj' +
25
+
'ZXQjbWVudGlvbmljcmVhdGVkQXR4GDIwMjQtMDItMjRUMTI6MTY6MjAuNjM3Wg==',
26
+
'base64',
27
+
);
28
+
29
+
// @ts-expect-error: node.js buffer it no likey
30
+
const result = Array.from(iterateAtpCar(buf));
31
+
32
+
expect(result).toEqual([
33
+
{
34
+
collection: 'app.bsky.actor.profile',
35
+
rkey: 'self',
36
+
record: {
37
+
$type: 'app.bsky.actor.profile',
38
+
createdAt: '2024-02-24T12:15:41.219Z',
39
+
displayName: '',
40
+
description: 'testing account',
41
+
},
42
+
},
43
+
{
44
+
collection: 'app.bsky.feed.post',
45
+
rkey: '3km5ymk4hhk2z',
46
+
record: {
47
+
$type: 'app.bsky.feed.post',
48
+
createdAt: '2024-02-24T12:16:20.637Z',
49
+
langs: ['en'],
50
+
text: 'beep boop @mary.my.id',
51
+
facets: [
52
+
{
53
+
$type: 'app.bsky.richtext.facet',
54
+
index: {
55
+
byteEnd: 21,
56
+
byteStart: 10,
57
+
},
58
+
features: [
59
+
{
60
+
did: 'did:plc:ia76kvnndjutgedggx2ibrem',
61
+
$type: 'app.bsky.richtext.facet#mention',
62
+
},
63
+
],
64
+
},
65
+
],
66
+
},
67
+
},
68
+
]);
69
+
});
+2
packages/utilities/car/lib/index.ts
+2
packages/utilities/car/lib/index.ts
+7
packages/utilities/car/lib/reader.ts
+7
packages/utilities/car/lib/reader.ts
+35
packages/utilities/car/lib/utilities/byte-reader.ts
+35
packages/utilities/car/lib/utilities/byte-reader.ts
···
1
+
export interface SyncByteReader {
2
+
readonly pos: number;
3
+
upto(size: number): Uint8Array;
4
+
exactly(size: number, seek: boolean): Uint8Array;
5
+
seek(size: number): void;
6
+
}
7
+
8
+
export const createUint8Reader = (buf: Uint8Array): SyncByteReader => {
9
+
let pos = 0;
10
+
11
+
return {
12
+
get pos() {
13
+
return pos;
14
+
},
15
+
16
+
seek(size) {
17
+
pos += size;
18
+
},
19
+
upto(size) {
20
+
return buf.subarray(pos, pos + Math.min(size, buf.length - pos));
21
+
},
22
+
exactly(size, seek) {
23
+
if (size > buf.length - pos) {
24
+
throw new RangeError('unexpected end of data');
25
+
}
26
+
27
+
const slice = buf.subarray(pos, pos + size);
28
+
if (seek) {
29
+
pos += size;
30
+
}
31
+
32
+
return slice;
33
+
},
34
+
};
35
+
};
+116
packages/utilities/car/lib/utilities/sync-car-reader.ts
+116
packages/utilities/car/lib/utilities/sync-car-reader.ts
···
1
+
import * as CBOR from '@atcute/cbor';
2
+
import * as varint from '@atcute/varint';
3
+
import * as CID from '@atcute/cid';
4
+
5
+
import type { SyncByteReader } from './byte-reader.js';
6
+
7
+
interface CarV1Header {
8
+
version: 1;
9
+
roots: CBOR.CIDLink[];
10
+
}
11
+
12
+
const isCarV1Header = (value: unknown): value is CarV1Header => {
13
+
if (value === null || typeof value !== 'object') {
14
+
return false;
15
+
}
16
+
17
+
const { version, roots } = value as CarV1Header;
18
+
return version === 1 && Array.isArray(roots) && roots.every((root) => root instanceof CBOR.CIDLinkWrapper);
19
+
};
20
+
21
+
const readVarint = (reader: SyncByteReader, size: number): number => {
22
+
const buf = reader.upto(size);
23
+
if (buf.length === 0) {
24
+
throw new RangeError(`unexpected end of data`);
25
+
}
26
+
27
+
const [int, read] = varint.decode(buf);
28
+
reader.seek(read);
29
+
30
+
return int;
31
+
};
32
+
33
+
const readHeader = (reader: SyncByteReader): CarV1Header => {
34
+
const length = readVarint(reader, 8);
35
+
if (length === 0) {
36
+
throw new RangeError(`invalid car header; length=0`);
37
+
}
38
+
39
+
const rawHeader = reader.exactly(length, true);
40
+
const header = CBOR.decode(rawHeader);
41
+
if (!isCarV1Header(header)) {
42
+
throw new TypeError(`expected a car v1 archive`);
43
+
}
44
+
45
+
return header;
46
+
};
47
+
48
+
const readMultihashDigest = (reader: SyncByteReader): CID.Digest => {
49
+
const first = reader.upto(8);
50
+
51
+
const [code, codeOffset] = varint.decode(first);
52
+
const [size, sizeOffset] = varint.decode(first.subarray(codeOffset));
53
+
54
+
const offset = codeOffset + sizeOffset;
55
+
56
+
const bytes = reader.exactly(offset + size, true);
57
+
const digest = bytes.subarray(offset);
58
+
59
+
return {
60
+
code: code,
61
+
size: size,
62
+
digest: digest,
63
+
bytes: bytes,
64
+
};
65
+
};
66
+
67
+
const readCid = (reader: SyncByteReader): CID.CID => {
68
+
const version = readVarint(reader, 8);
69
+
if (version !== 1) {
70
+
throw new Error(`expected a cidv1`);
71
+
}
72
+
73
+
const codec = readVarint(reader, 8);
74
+
const digest = readMultihashDigest(reader);
75
+
76
+
const cid: CID.CID = {
77
+
version: version,
78
+
code: codec,
79
+
digest: digest,
80
+
bytes: CID.encode(version, codec, digest.bytes),
81
+
};
82
+
83
+
return cid;
84
+
};
85
+
86
+
const readBlockHeader = (reader: SyncByteReader): { cid: CID.CID; blockSize: number } => {
87
+
const start = reader.pos;
88
+
89
+
let size = readVarint(reader, 8);
90
+
if (size === 0) {
91
+
throw new Error(`invalid car section; length=0`);
92
+
}
93
+
94
+
size += reader.pos - start;
95
+
96
+
const cid = readCid(reader);
97
+
const blockSize = size - Number(reader.pos - start);
98
+
99
+
return { cid, blockSize };
100
+
};
101
+
102
+
export const createCarReader = (reader: SyncByteReader) => {
103
+
const { roots } = readHeader(reader);
104
+
105
+
return {
106
+
roots,
107
+
*iterate(): Generator<{ cid: CID.CID; bytes: Uint8Array }> {
108
+
while (reader.upto(8).length > 0) {
109
+
const { cid, blockSize } = readBlockHeader(reader);
110
+
const bytes = reader.exactly(blockSize, true);
111
+
112
+
yield { cid, bytes };
113
+
}
114
+
},
115
+
};
116
+
};
+30
packages/utilities/car/package.json
+30
packages/utilities/car/package.json
···
1
+
{
2
+
"type": "module",
3
+
"name": "@atcute/car",
4
+
"version": "1.0.0",
5
+
"description": "read AT Protocol's CAR (content-addressable archive) repositories",
6
+
"license": "MIT",
7
+
"repository": {
8
+
"url": "https://codeberg.org/mary-ext/atcute"
9
+
},
10
+
"files": [
11
+
"dist/"
12
+
],
13
+
"exports": {
14
+
".": "./dist/index.js"
15
+
},
16
+
"sideEffects": false,
17
+
"scripts": {
18
+
"build": "tsc --project tsconfig.build.json",
19
+
"test": "bun test --coverage",
20
+
"prepublish": "rm -rf dist; pnpm run build"
21
+
},
22
+
"devDependencies": {
23
+
"@types/bun": "^1.1.10"
24
+
},
25
+
"dependencies": {
26
+
"@atcute/cbor": "workspace:^",
27
+
"@atcute/cid": "workspace:^",
28
+
"@atcute/varint": "workspace:^"
29
+
}
30
+
}
+4
packages/utilities/car/tsconfig.build.json
+4
packages/utilities/car/tsconfig.build.json
+23
packages/utilities/car/tsconfig.json
+23
packages/utilities/car/tsconfig.json
···
1
+
{
2
+
"compilerOptions": {
3
+
"types": ["bun"],
4
+
"outDir": "dist/",
5
+
"esModuleInterop": true,
6
+
"skipLibCheck": true,
7
+
"target": "ESNext",
8
+
"allowJs": true,
9
+
"resolveJsonModule": true,
10
+
"moduleDetection": "force",
11
+
"isolatedModules": true,
12
+
"verbatimModuleSyntax": true,
13
+
"strict": true,
14
+
"noImplicitOverride": true,
15
+
"noUnusedLocals": true,
16
+
"noUnusedParameters": true,
17
+
"noFallthroughCasesInSwitch": true,
18
+
"module": "NodeNext",
19
+
"sourceMap": true,
20
+
"declaration": true,
21
+
},
22
+
"include": ["lib"],
23
+
}
+16
pnpm-lock.yaml
+16
pnpm-lock.yaml
···
161
161
specifier: ^1.1.10
162
162
version: 1.1.10
163
163
164
+
packages/utilities/car:
165
+
dependencies:
166
+
'@atcute/cbor':
167
+
specifier: workspace:^
168
+
version: link:../cbor
169
+
'@atcute/cid':
170
+
specifier: workspace:^
171
+
version: link:../cid
172
+
'@atcute/varint':
173
+
specifier: workspace:^
174
+
version: link:../varint
175
+
devDependencies:
176
+
'@types/bun':
177
+
specifier: ^1.1.10
178
+
version: 1.1.10
179
+
164
180
packages/utilities/cbor:
165
181
dependencies:
166
182
'@atcute/base32':