+1
-1
diesel.toml
+1
-1
diesel.toml
-18
fk.patch
-18
fk.patch
···
1
-
diff --git a/parakeet-db/src/schema.rs b/parakeet-db/src/schema.rs
2
-
index 59f65d9..a4219d5 100644
3
-
--- a/parakeet-db/src/schema.rs
4
-
+++ b/parakeet-db/src/schema.rs
5
-
@@ -364,11 +364,13 @@ diesel::joinable!(post_embed_images -> posts (post_uri));
6
-
diesel::joinable!(post_embed_record -> posts (post_uri));
7
-
diesel::joinable!(post_embed_video -> posts (post_uri));
8
-
diesel::joinable!(post_embed_video_captions -> posts (post_uri));
9
-
+diesel::joinable!(postgates -> posts (post_uri));
10
-
diesel::joinable!(posts -> actors (did));
11
-
diesel::joinable!(profiles -> actors (did));
12
-
diesel::joinable!(reposts -> actors (did));
13
-
diesel::joinable!(starterpacks -> actors (owner));
14
-
diesel::joinable!(statuses -> actors (did));
15
-
+diesel::joinable!(threadgates -> posts (post_uri));
16
-
diesel::joinable!(verification -> actors (verifier));
17
-
18
-
diesel::allow_tables_to_appear_in_same_query!(
-10
migrations/2025-10-29-164300_drop_records_table/down.sql
-10
migrations/2025-10-29-164300_drop_records_table/down.sql
···
1
-
-- Recreate the generic records table (if rollback is needed)
2
-
CREATE TABLE records (
3
-
at_uri TEXT PRIMARY KEY,
4
-
did TEXT NOT NULL,
5
-
cid BYTEA NOT NULL,
6
-
created_at TIMESTAMP NOT NULL DEFAULT NOW()
7
-
);
8
-
9
-
CREATE INDEX idx_records_did ON records(did);
10
-
CREATE INDEX idx_records_created_at ON records(created_at);
-3
migrations/2025-10-29-164300_drop_records_table/up.sql
-3
migrations/2025-10-29-164300_drop_records_table/up.sql
+9
migrations/2025-10-30-020000_optimize_records_table/down.sql
+9
migrations/2025-10-30-020000_optimize_records_table/down.sql
···
1
+
-- Rollback: This migration cannot be rolled back safely
2
+
--
3
+
-- The old tables are dropped as part of the forward migration, so rollback
4
+
-- would result in complete data loss.
5
+
--
6
+
-- If you need to rollback, you must restore from a backup taken before
7
+
-- running the migration.
8
+
9
+
SELECT 'ERROR: This migration cannot be rolled back. Restore from backup instead.' as error;
+440
migrations/2025-10-30-020000_optimize_records_table/up.sql
+440
migrations/2025-10-30-020000_optimize_records_table/up.sql
···
1
+
-- Comprehensive storage optimization: Normalize DIDs, optimize CIDs, use FKs to records
2
+
--
3
+
-- This migration optimizes ALL major tables in one pass:
4
+
-- - records: Remove at_uri, use actor_id + collection ENUM + rkey
5
+
-- - likes: Replace did + subject + subject_cid with actor_id + subject_record_id
6
+
-- - reposts: Replace did + post + post_cid with actor_id + post_record_id
7
+
-- - posts: Replace did + parent/root URIs with actor_id + parent/root_record_id, strip CID headers
8
+
-- - follows: Replace did with actor_id
9
+
-- - blocks: Replace did with actor_id
10
+
-- - notifications: Replace DIDs with actor_ids, strip CID header
11
+
--
12
+
-- Total storage savings: ~6-8 GB (17-22% reduction)
13
+
14
+
-- Step 1: Ensure actors table has id column
15
+
DO $$
16
+
BEGIN
17
+
IF NOT EXISTS (SELECT 1 FROM information_schema.columns
18
+
WHERE table_name = 'actors' AND column_name = 'id') THEN
19
+
ALTER TABLE actors ADD COLUMN id SERIAL UNIQUE;
20
+
END IF;
21
+
END $$;
22
+
23
+
-- Step 2: Create record_type ENUM
24
+
-- Extract all unique collection types from existing records
25
+
DO $$
26
+
DECLARE
27
+
collection_types TEXT[];
28
+
BEGIN
29
+
-- Get all unique collection types
30
+
SELECT array_agg(DISTINCT SPLIT_PART(at_uri, '/', 4) ORDER BY SPLIT_PART(at_uri, '/', 4))
31
+
INTO collection_types
32
+
FROM records
33
+
WHERE at_uri IS NOT NULL;
34
+
35
+
-- Create enum type if it doesn't exist
36
+
IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'record_type') THEN
37
+
EXECUTE format('CREATE TYPE record_type AS ENUM (%s)',
38
+
(SELECT string_agg(quote_literal(t), ', ') FROM unnest(collection_types) t)
39
+
);
40
+
END IF;
41
+
END $$;
42
+
43
+
-- Step 3: Create new optimized records table
44
+
CREATE TABLE records_new (
45
+
id BIGSERIAL PRIMARY KEY,
46
+
actor_id INTEGER NOT NULL,
47
+
collection record_type NOT NULL, -- PostgreSQL ENUM (4 bytes)
48
+
rkey TEXT NOT NULL,
49
+
cid BYTEA NOT NULL, -- 32 bytes (header stripped), not 36
50
+
indexed_at TIMESTAMP NOT NULL
51
+
);
52
+
53
+
-- Step 4: Copy and transform data in a single pass
54
+
-- This is the expensive operation but happens once
55
+
INSERT INTO records_new (actor_id, collection, rkey, cid, indexed_at)
56
+
SELECT
57
+
a.id,
58
+
SPLIT_PART(r.at_uri, '/', 4)::record_type as collection,
59
+
SPLIT_PART(r.at_uri, '/', 5) as rkey,
60
+
-- Strip 4-byte CID header (0x01711220), keep only 32-byte digest
61
+
substring(r.cid from 5) as cid_digest,
62
+
r.indexed_at
63
+
FROM records r
64
+
INNER JOIN actors a ON r.did = a.did
65
+
WHERE a.id IS NOT NULL; -- Only migrate records where actor has ID
66
+
67
+
-- Step 5: Create indexes on new table
68
+
CREATE UNIQUE INDEX idx_records_new_composite ON records_new(actor_id, collection, rkey);
69
+
CREATE INDEX idx_records_new_actor_id ON records_new(actor_id);
70
+
CREATE INDEX idx_records_new_collection ON records_new(collection);
71
+
CREATE INDEX idx_records_new_indexed_at ON records_new(indexed_at);
72
+
73
+
-- Step 6: Add foreign key constraint
74
+
ALTER TABLE records_new
75
+
ADD CONSTRAINT fk_records_actor
76
+
FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE;
77
+
78
+
-- Step 7: Swap tables (brief exclusive lock)
79
+
ALTER TABLE records RENAME TO records_old;
80
+
ALTER TABLE records_new RENAME TO records;
81
+
82
+
-- Step 8: Update actors primary key to use id instead of did
83
+
ALTER TABLE actors DROP CONSTRAINT IF EXISTS actors_pkey CASCADE;
84
+
ALTER TABLE actors ADD PRIMARY KEY (id);
85
+
CREATE UNIQUE INDEX idx_actors_did ON actors(did);
86
+
87
+
-- Step 8a: Drop old records table immediately
88
+
DROP TABLE records_old;
89
+
90
+
--------------------------------------------------------------------------------
91
+
-- LIKES TABLE OPTIMIZATION
92
+
--------------------------------------------------------------------------------
93
+
94
+
-- Step 9: Create optimized likes table
95
+
CREATE TABLE likes_new (
96
+
id BIGSERIAL PRIMARY KEY,
97
+
rkey TEXT NOT NULL,
98
+
actor_id INTEGER NOT NULL,
99
+
subject_record_id BIGINT NOT NULL,
100
+
created_at TIMESTAMPTZ NOT NULL,
101
+
indexed_at TIMESTAMP NOT NULL,
102
+
via_record_id BIGINT -- NULL if no via
103
+
);
104
+
105
+
-- Step 10: Transform likes data
106
+
-- Strategy: Parse subject URI, join to records via composite key
107
+
INSERT INTO likes_new (rkey, actor_id, subject_record_id, created_at, indexed_at, via_record_id)
108
+
SELECT
109
+
l.rkey,
110
+
a.id,
111
+
r_subject.id,
112
+
l.created_at,
113
+
l.indexed_at,
114
+
r_via.id
115
+
FROM likes l
116
+
INNER JOIN actors a ON l.did = a.did
117
+
-- Parse subject URI and find matching record
118
+
INNER JOIN actors actors_subj ON SPLIT_PART(l.subject, '/', 3) = actors_subj.did
119
+
INNER JOIN records r_subject ON
120
+
r_subject.actor_id = actors_subj.id
121
+
AND r_subject.collection::text = SPLIT_PART(l.subject, '/', 4)
122
+
AND r_subject.rkey = SPLIT_PART(l.subject, '/', 5)
123
+
-- Parse via_uri and find matching record if exists
124
+
LEFT JOIN actors actors_via ON l.via_uri IS NOT NULL AND SPLIT_PART(l.via_uri, '/', 3) = actors_via.did
125
+
LEFT JOIN records r_via ON
126
+
r_via.actor_id = actors_via.id
127
+
AND r_via.collection::text = SPLIT_PART(l.via_uri, '/', 4)
128
+
AND r_via.rkey = SPLIT_PART(l.via_uri, '/', 5)
129
+
WHERE a.id IS NOT NULL;
130
+
131
+
-- Step 11: Create indexes on likes_new
132
+
CREATE UNIQUE INDEX idx_likes_new_actor_rkey ON likes_new(actor_id, rkey);
133
+
CREATE INDEX idx_likes_new_subject ON likes_new(subject_record_id);
134
+
CREATE INDEX idx_likes_new_indexed_at ON likes_new(indexed_at);
135
+
CREATE INDEX idx_likes_new_via ON likes_new(via_record_id) WHERE via_record_id IS NOT NULL;
136
+
137
+
-- Step 12: Add foreign key constraints for likes
138
+
ALTER TABLE likes_new
139
+
ADD CONSTRAINT fk_likes_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE,
140
+
ADD CONSTRAINT fk_likes_subject FOREIGN KEY (subject_record_id) REFERENCES records(id) ON DELETE CASCADE,
141
+
ADD CONSTRAINT fk_likes_via FOREIGN KEY (via_record_id) REFERENCES records(id) ON DELETE SET NULL;
142
+
143
+
-- Step 13: Swap likes tables
144
+
ALTER TABLE likes RENAME TO likes_old;
145
+
ALTER TABLE likes_new RENAME TO likes;
146
+
147
+
-- Step 13a: Drop old likes table immediately
148
+
DROP TABLE likes_old;
149
+
150
+
--------------------------------------------------------------------------------
151
+
-- REPOSTS TABLE OPTIMIZATION
152
+
--------------------------------------------------------------------------------
153
+
154
+
-- Step 14: Create optimized reposts table
155
+
CREATE TABLE reposts_new (
156
+
id BIGSERIAL PRIMARY KEY,
157
+
rkey TEXT NOT NULL,
158
+
actor_id INTEGER NOT NULL,
159
+
post_record_id BIGINT NOT NULL,
160
+
created_at TIMESTAMPTZ NOT NULL,
161
+
indexed_at TIMESTAMP NOT NULL,
162
+
via_record_id BIGINT -- NULL if no via
163
+
);
164
+
165
+
-- Step 15: Transform reposts data
166
+
INSERT INTO reposts_new (rkey, actor_id, post_record_id, created_at, indexed_at, via_record_id)
167
+
SELECT
168
+
rp.rkey,
169
+
a.id,
170
+
r_post.id,
171
+
rp.created_at,
172
+
rp.indexed_at,
173
+
r_via.id
174
+
FROM reposts rp
175
+
INNER JOIN actors a ON rp.did = a.did
176
+
-- Parse post URI and find matching record
177
+
INNER JOIN actors actors_post ON SPLIT_PART(rp.post, '/', 3) = actors_post.did
178
+
INNER JOIN records r_post ON
179
+
r_post.actor_id = actors_post.id
180
+
AND r_post.collection::text = SPLIT_PART(rp.post, '/', 4)
181
+
AND r_post.rkey = SPLIT_PART(rp.post, '/', 5)
182
+
-- Parse via_uri and find matching record if exists
183
+
LEFT JOIN actors actors_via ON rp.via_uri IS NOT NULL AND SPLIT_PART(rp.via_uri, '/', 3) = actors_via.did
184
+
LEFT JOIN records r_via ON
185
+
r_via.actor_id = actors_via.id
186
+
AND r_via.collection::text = SPLIT_PART(rp.via_uri, '/', 4)
187
+
AND r_via.rkey = SPLIT_PART(rp.via_uri, '/', 5)
188
+
WHERE a.id IS NOT NULL;
189
+
190
+
-- Step 16: Create indexes on reposts_new
191
+
CREATE UNIQUE INDEX idx_reposts_new_actor_rkey ON reposts_new(actor_id, rkey);
192
+
CREATE INDEX idx_reposts_new_post ON reposts_new(post_record_id);
193
+
CREATE INDEX idx_reposts_new_indexed_at ON reposts_new(indexed_at);
194
+
CREATE INDEX idx_reposts_new_via ON reposts_new(via_record_id) WHERE via_record_id IS NOT NULL;
195
+
196
+
-- Step 17: Add foreign key constraints for reposts
197
+
ALTER TABLE reposts_new
198
+
ADD CONSTRAINT fk_reposts_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE,
199
+
ADD CONSTRAINT fk_reposts_post FOREIGN KEY (post_record_id) REFERENCES records(id) ON DELETE CASCADE,
200
+
ADD CONSTRAINT fk_reposts_via FOREIGN KEY (via_record_id) REFERENCES records(id) ON DELETE SET NULL;
201
+
202
+
-- Step 18: Swap reposts tables
203
+
ALTER TABLE reposts RENAME TO reposts_old;
204
+
ALTER TABLE reposts_new RENAME TO reposts;
205
+
206
+
-- Step 18a: Drop old reposts table immediately
207
+
DROP TABLE reposts_old;
208
+
209
+
--------------------------------------------------------------------------------
210
+
-- POSTS TABLE OPTIMIZATION
211
+
--------------------------------------------------------------------------------
212
+
213
+
-- Step 19: Create optimized posts table
214
+
CREATE TABLE posts_new (
215
+
id BIGSERIAL PRIMARY KEY,
216
+
at_uri TEXT NOT NULL UNIQUE, -- Keep at_uri as it's referenced by many tables
217
+
cid_digest BYTEA NOT NULL, -- Strip header, 32 bytes
218
+
actor_id INTEGER NOT NULL,
219
+
record JSONB NOT NULL,
220
+
221
+
content TEXT NOT NULL,
222
+
facets JSONB,
223
+
languages TEXT[] NOT NULL,
224
+
tags TEXT[] NOT NULL,
225
+
226
+
parent_record_id BIGINT, -- FK to records instead of URI+CID
227
+
root_record_id BIGINT, -- FK to records instead of URI+CID
228
+
229
+
embed TEXT,
230
+
embed_subtype TEXT,
231
+
232
+
mentions TEXT[],
233
+
violates_threadgate BOOLEAN NOT NULL DEFAULT false,
234
+
235
+
created_at TIMESTAMPTZ NOT NULL,
236
+
indexed_at TIMESTAMP NOT NULL
237
+
);
238
+
239
+
-- Step 20: Transform posts data
240
+
-- Note: posts.cid is TEXT (base32), we need to convert to binary digest
241
+
INSERT INTO posts_new (at_uri, cid_digest, actor_id, record, content, facets, languages, tags,
242
+
parent_record_id, root_record_id, embed, embed_subtype, mentions,
243
+
violates_threadgate, created_at, indexed_at)
244
+
SELECT
245
+
p.at_uri,
246
+
decode(substring(p.cid from 9), 'hex'), -- Strip text CID header, store binary digest
247
+
a.id,
248
+
p.record,
249
+
p.content,
250
+
p.facets,
251
+
p.languages,
252
+
p.tags,
253
+
r_parent.id,
254
+
r_root.id,
255
+
p.embed,
256
+
p.embed_subtype,
257
+
p.mentions,
258
+
p.violates_threadgate,
259
+
p.created_at,
260
+
p.indexed_at
261
+
FROM posts p
262
+
INNER JOIN actors a ON p.did = a.did
263
+
-- Parse parent_uri and find matching record if exists
264
+
LEFT JOIN actors actors_parent ON p.parent_uri IS NOT NULL AND SPLIT_PART(p.parent_uri, '/', 3) = actors_parent.did
265
+
LEFT JOIN records r_parent ON
266
+
r_parent.actor_id = actors_parent.id
267
+
AND r_parent.collection::text = SPLIT_PART(p.parent_uri, '/', 4)
268
+
AND r_parent.rkey = SPLIT_PART(p.parent_uri, '/', 5)
269
+
-- Parse root_uri and find matching record if exists
270
+
LEFT JOIN actors actors_root ON p.root_uri IS NOT NULL AND SPLIT_PART(p.root_uri, '/', 3) = actors_root.did
271
+
LEFT JOIN records r_root ON
272
+
r_root.actor_id = actors_root.id
273
+
AND r_root.collection::text = SPLIT_PART(p.root_uri, '/', 4)
274
+
AND r_root.rkey = SPLIT_PART(p.root_uri, '/', 5)
275
+
WHERE a.id IS NOT NULL;
276
+
277
+
-- Step 21: Create indexes on posts_new
278
+
CREATE INDEX idx_posts_new_actor ON posts_new(actor_id);
279
+
CREATE INDEX idx_posts_new_created_at ON posts_new(created_at);
280
+
CREATE INDEX idx_posts_new_indexed_at ON posts_new(indexed_at);
281
+
CREATE INDEX idx_posts_new_parent ON posts_new(parent_record_id) WHERE parent_record_id IS NOT NULL;
282
+
CREATE INDEX idx_posts_new_root ON posts_new(root_record_id) WHERE root_record_id IS NOT NULL;
283
+
284
+
-- Step 22: Add foreign key constraints for posts
285
+
ALTER TABLE posts_new
286
+
ADD CONSTRAINT fk_posts_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE,
287
+
ADD CONSTRAINT fk_posts_parent FOREIGN KEY (parent_record_id) REFERENCES records(id) ON DELETE SET NULL,
288
+
ADD CONSTRAINT fk_posts_root FOREIGN KEY (root_record_id) REFERENCES records(id) ON DELETE SET NULL;
289
+
290
+
-- Step 23: Swap posts tables
291
+
ALTER TABLE posts RENAME TO posts_old;
292
+
ALTER TABLE posts_new RENAME TO posts;
293
+
294
+
-- Step 23a: Drop old posts table immediately
295
+
DROP TABLE posts_old;
296
+
297
+
--------------------------------------------------------------------------------
298
+
-- FOLLOWS TABLE OPTIMIZATION
299
+
--------------------------------------------------------------------------------
300
+
301
+
-- Step 24: Create optimized follows table
302
+
CREATE TABLE follows_new (
303
+
id BIGSERIAL PRIMARY KEY,
304
+
rkey TEXT NOT NULL,
305
+
actor_id INTEGER NOT NULL,
306
+
subject_actor_id INTEGER NOT NULL,
307
+
created_at TIMESTAMPTZ NOT NULL,
308
+
UNIQUE(actor_id, rkey)
309
+
);
310
+
311
+
-- Step 25: Transform follows data
312
+
INSERT INTO follows_new (rkey, actor_id, subject_actor_id, created_at)
313
+
SELECT
314
+
f.rkey,
315
+
a.id,
316
+
a_subject.id,
317
+
f.created_at
318
+
FROM follows f
319
+
INNER JOIN actors a ON f.did = a.did
320
+
INNER JOIN actors a_subject ON f.subject = a_subject.did
321
+
WHERE a.id IS NOT NULL AND a_subject.id IS NOT NULL;
322
+
323
+
-- Step 26: Create indexes on follows_new
324
+
CREATE INDEX idx_follows_new_subject ON follows_new(subject_actor_id);
325
+
CREATE INDEX idx_follows_new_created_at ON follows_new(created_at);
326
+
327
+
-- Step 27: Add foreign key constraints for follows
328
+
ALTER TABLE follows_new
329
+
ADD CONSTRAINT fk_follows_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE,
330
+
ADD CONSTRAINT fk_follows_subject FOREIGN KEY (subject_actor_id) REFERENCES actors(id) ON DELETE CASCADE;
331
+
332
+
-- Step 28: Swap follows tables
333
+
ALTER TABLE follows RENAME TO follows_old;
334
+
ALTER TABLE follows_new RENAME TO follows;
335
+
336
+
-- Step 28a: Drop old follows table immediately
337
+
DROP TABLE follows_old;
338
+
339
+
--------------------------------------------------------------------------------
340
+
-- BLOCKS TABLE OPTIMIZATION
341
+
--------------------------------------------------------------------------------
342
+
343
+
-- Step 29: Create optimized blocks table
344
+
CREATE TABLE blocks_new (
345
+
id BIGSERIAL PRIMARY KEY,
346
+
rkey TEXT NOT NULL,
347
+
actor_id INTEGER NOT NULL,
348
+
subject_actor_id INTEGER NOT NULL,
349
+
created_at TIMESTAMPTZ NOT NULL,
350
+
UNIQUE(actor_id, rkey)
351
+
);
352
+
353
+
-- Step 30: Transform blocks data
354
+
INSERT INTO blocks_new (rkey, actor_id, subject_actor_id, created_at)
355
+
SELECT
356
+
b.rkey,
357
+
a.id,
358
+
a_subject.id,
359
+
b.created_at
360
+
FROM blocks b
361
+
INNER JOIN actors a ON b.did = a.did
362
+
INNER JOIN actors a_subject ON b.subject = a_subject.did
363
+
WHERE a.id IS NOT NULL AND a_subject.id IS NOT NULL;
364
+
365
+
-- Step 31: Create indexes on blocks_new
366
+
CREATE INDEX idx_blocks_new_subject ON blocks_new(subject_actor_id);
367
+
CREATE INDEX idx_blocks_new_created_at ON blocks_new(created_at);
368
+
369
+
-- Step 32: Add foreign key constraints for blocks
370
+
ALTER TABLE blocks_new
371
+
ADD CONSTRAINT fk_blocks_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE,
372
+
ADD CONSTRAINT fk_blocks_subject FOREIGN KEY (subject_actor_id) REFERENCES actors(id) ON DELETE CASCADE;
373
+
374
+
-- Step 33: Swap blocks tables
375
+
ALTER TABLE blocks RENAME TO blocks_old;
376
+
ALTER TABLE blocks_new RENAME TO blocks;
377
+
378
+
-- Step 33a: Drop old blocks table immediately
379
+
DROP TABLE blocks_old;
380
+
381
+
--------------------------------------------------------------------------------
382
+
-- NOTIFICATIONS TABLE OPTIMIZATION
383
+
--------------------------------------------------------------------------------
384
+
385
+
-- Step 34: Create optimized notifications table
386
+
CREATE TABLE notifications_new (
387
+
id BIGSERIAL PRIMARY KEY,
388
+
record_id BIGINT NOT NULL, -- FK to records instead of URI
389
+
recipient_actor_id INTEGER NOT NULL,
390
+
author_actor_id INTEGER NOT NULL,
391
+
reason TEXT NOT NULL,
392
+
reason_subject TEXT,
393
+
cid_digest BYTEA NOT NULL, -- Strip header, 32 bytes
394
+
is_read BOOLEAN NOT NULL DEFAULT false,
395
+
indexed_at TIMESTAMP NOT NULL,
396
+
created_at TIMESTAMP NOT NULL
397
+
);
398
+
399
+
-- Step 35: Transform notifications data
400
+
INSERT INTO notifications_new (record_id, recipient_actor_id, author_actor_id, reason,
401
+
reason_subject, cid_digest, is_read, indexed_at, created_at)
402
+
SELECT
403
+
r.id,
404
+
a_recipient.id,
405
+
a_author.id,
406
+
n.reason,
407
+
n.reason_subject,
408
+
decode(substring(n.cid from 9), 'hex'), -- Strip text CID header
409
+
n.is_read,
410
+
n.indexed_at,
411
+
n.created_at
412
+
FROM notifications n
413
+
INNER JOIN actors a_recipient ON n.recipient_did = a_recipient.did
414
+
INNER JOIN actors a_author ON n.author_did = a_author.did
415
+
-- Parse notification URI and find matching record
416
+
INNER JOIN actors actors_record ON SPLIT_PART(n.uri, '/', 3) = actors_record.did
417
+
INNER JOIN records r ON
418
+
r.actor_id = actors_record.id
419
+
AND r.collection::text = SPLIT_PART(n.uri, '/', 4)
420
+
AND r.rkey = SPLIT_PART(n.uri, '/', 5)
421
+
WHERE a_recipient.id IS NOT NULL AND a_author.id IS NOT NULL;
422
+
423
+
-- Step 36: Create indexes on notifications_new
424
+
CREATE INDEX idx_notifications_new_recipient ON notifications_new(recipient_actor_id);
425
+
CREATE INDEX idx_notifications_new_author ON notifications_new(author_actor_id);
426
+
CREATE INDEX idx_notifications_new_record ON notifications_new(record_id);
427
+
CREATE INDEX idx_notifications_new_indexed_at ON notifications_new(indexed_at);
428
+
429
+
-- Step 37: Add foreign key constraints for notifications
430
+
ALTER TABLE notifications_new
431
+
ADD CONSTRAINT fk_notifications_recipient FOREIGN KEY (recipient_actor_id) REFERENCES actors(id) ON DELETE CASCADE,
432
+
ADD CONSTRAINT fk_notifications_author FOREIGN KEY (author_actor_id) REFERENCES actors(id) ON DELETE CASCADE,
433
+
ADD CONSTRAINT fk_notifications_record FOREIGN KEY (record_id) REFERENCES records(id) ON DELETE CASCADE;
434
+
435
+
-- Step 38: Swap notifications tables
436
+
ALTER TABLE notifications RENAME TO notifications_old;
437
+
ALTER TABLE notifications_new RENAME TO notifications;
438
+
439
+
-- Step 38a: Drop old notifications table immediately
440
+
DROP TABLE notifications_old;
+91
-12
parakeet-db/src/models.rs
+91
-12
parakeet-db/src/models.rs
···
5
5
6
6
#[derive(Debug, Queryable, Selectable, Identifiable)]
7
7
#[diesel(table_name = crate::schema::actors)]
8
-
#[diesel(primary_key(did))]
8
+
#[diesel(primary_key(id))]
9
9
#[diesel(check_for_backend(diesel::pg::Pg))]
10
10
pub struct Actor {
11
11
pub did: String,
···
15
15
pub repo_rev: Option<String>,
16
16
pub repo_cid: Option<String>,
17
17
pub last_indexed: Option<NaiveDateTime>,
18
+
pub id: i32,
18
19
}
19
20
20
21
#[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)]
···
114
115
115
116
#[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)]
116
117
#[diesel(table_name = crate::schema::posts)]
117
-
#[diesel(primary_key(at_uri))]
118
+
#[diesel(primary_key(id))]
118
119
#[diesel(check_for_backend(diesel::pg::Pg))]
119
120
pub struct Post {
121
+
pub id: i64,
120
122
pub at_uri: String,
121
-
pub cid: String,
122
-
pub did: String,
123
+
pub cid_digest: Vec<u8>,
124
+
pub actor_id: i32,
123
125
pub record: serde_json::Value,
124
126
125
127
pub content: String,
···
127
129
pub languages: not_null_vec::TextArray,
128
130
pub tags: not_null_vec::TextArray,
129
131
130
-
pub parent_uri: Option<String>,
131
-
pub parent_cid: Option<String>,
132
-
pub root_uri: Option<String>,
133
-
pub root_cid: Option<String>,
132
+
pub parent_record_id: Option<i64>,
133
+
pub root_record_id: Option<i64>,
134
134
135
135
pub embed: Option<String>,
136
136
pub embed_subtype: Option<String>,
···
347
347
#[diesel(check_for_backend(diesel::pg::Pg))]
348
348
pub struct Notification {
349
349
pub id: i64,
350
-
pub uri: String,
351
-
pub recipient_did: String,
352
-
pub author_did: String,
350
+
pub record_id: i64,
351
+
pub recipient_actor_id: i32,
352
+
pub author_actor_id: i32,
353
353
pub reason: String,
354
354
pub reason_subject: Option<String>,
355
-
pub cid: String,
355
+
pub cid_digest: Vec<u8>,
356
356
pub is_read: bool,
357
357
pub indexed_at: NaiveDateTime,
358
358
pub created_at: NaiveDateTime,
···
440
440
pub did: String,
441
441
pub typ: String,
442
442
pub sort_at: DateTime<Utc>,
443
+
}
444
+
445
+
// Optimized table models
446
+
447
+
// Wrapper type for RecordType to allow String deserialization
448
+
#[derive(Debug, Clone, diesel::expression::AsExpression, diesel::deserialize::FromSqlRow)]
449
+
#[diesel(sql_type = crate::schema::sql_types::RecordType)]
450
+
pub struct RecordTypeWrapper(pub String);
451
+
452
+
impl diesel::deserialize::FromSql<crate::schema::sql_types::RecordType, diesel::pg::Pg> for RecordTypeWrapper {
453
+
fn from_sql(bytes: diesel::pg::PgValue) -> diesel::deserialize::Result<Self> {
454
+
let s = <String as diesel::deserialize::FromSql<diesel::sql_types::Text, diesel::pg::Pg>>::from_sql(bytes)?;
455
+
Ok(RecordTypeWrapper(s))
456
+
}
457
+
}
458
+
459
+
#[derive(Clone, Debug, Queryable, Selectable, Identifiable)]
460
+
#[diesel(table_name = crate::schema::records)]
461
+
#[diesel(primary_key(id))]
462
+
#[diesel(check_for_backend(diesel::pg::Pg))]
463
+
pub struct Record {
464
+
pub id: i64,
465
+
pub actor_id: i32,
466
+
pub collection: RecordTypeWrapper,
467
+
pub rkey: String,
468
+
pub cid: Vec<u8>, // 32-byte digest (header stripped)
469
+
pub indexed_at: NaiveDateTime,
470
+
}
471
+
472
+
#[derive(Clone, Debug, Queryable, Selectable, Identifiable)]
473
+
#[diesel(table_name = crate::schema::likes)]
474
+
#[diesel(primary_key(id))]
475
+
#[diesel(check_for_backend(diesel::pg::Pg))]
476
+
pub struct Like {
477
+
pub id: i64,
478
+
pub rkey: String,
479
+
pub actor_id: i32,
480
+
pub subject_record_id: i64,
481
+
pub created_at: DateTime<Utc>,
482
+
pub indexed_at: NaiveDateTime,
483
+
pub via_record_id: Option<i64>,
484
+
}
485
+
486
+
#[derive(Clone, Debug, Queryable, Selectable, Identifiable)]
487
+
#[diesel(table_name = crate::schema::reposts)]
488
+
#[diesel(primary_key(id))]
489
+
#[diesel(check_for_backend(diesel::pg::Pg))]
490
+
pub struct Repost {
491
+
pub id: i64,
492
+
pub rkey: String,
493
+
pub actor_id: i32,
494
+
pub post_record_id: i64,
495
+
pub created_at: DateTime<Utc>,
496
+
pub indexed_at: NaiveDateTime,
497
+
pub via_record_id: Option<i64>,
498
+
}
499
+
500
+
#[derive(Clone, Debug, Queryable, Selectable, Identifiable)]
501
+
#[diesel(table_name = crate::schema::follows)]
502
+
#[diesel(primary_key(id))]
503
+
#[diesel(check_for_backend(diesel::pg::Pg))]
504
+
pub struct Follow {
505
+
pub id: i64,
506
+
pub rkey: String,
507
+
pub actor_id: i32,
508
+
pub subject_actor_id: i32,
509
+
pub created_at: DateTime<Utc>,
510
+
}
511
+
512
+
#[derive(Clone, Debug, Queryable, Selectable, Identifiable)]
513
+
#[diesel(table_name = crate::schema::blocks)]
514
+
#[diesel(primary_key(id))]
515
+
#[diesel(check_for_backend(diesel::pg::Pg))]
516
+
pub struct Block {
517
+
pub id: i64,
518
+
pub rkey: String,
519
+
pub actor_id: i32,
520
+
pub subject_actor_id: i32,
521
+
pub created_at: DateTime<Utc>,
443
522
}
444
523
445
524
pub use not_null_vec::TextArray;
+63
-126
parakeet-db/src/schema.rs
+63
-126
parakeet-db/src/schema.rs
···
2
2
3
3
pub mod sql_types {
4
4
#[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)]
5
+
#[diesel(postgres_type(name = "record_type"))]
6
+
pub struct RecordType;
7
+
8
+
#[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)]
5
9
#[diesel(postgres_type(name = "tsvector", schema = "pg_catalog"))]
6
10
pub struct Tsvector;
7
11
}
8
12
9
13
diesel::table! {
10
-
actor_pds_mapping (did) {
11
-
did -> Text,
12
-
pds_host -> Text,
13
-
resolved_at -> Timestamp,
14
-
resolution_method -> Text,
15
-
confidence -> Text,
16
-
}
17
-
}
18
-
19
-
diesel::table! {
20
-
actors (did) {
14
+
actors (id) {
21
15
did -> Text,
22
16
handle -> Nullable<Text>,
23
17
status -> Text,
···
25
19
repo_rev -> Nullable<Text>,
26
20
repo_cid -> Nullable<Text>,
27
21
last_indexed -> Nullable<Timestamp>,
22
+
id -> Int4,
28
23
}
29
24
}
30
25
···
49
44
}
50
45
51
46
diesel::table! {
52
-
backfill_jobs (id) {
53
-
id -> Int4,
54
-
did -> Text,
55
-
since -> Nullable<Text>,
56
-
status -> Text,
57
-
created_at -> Timestamp,
58
-
updated_at -> Timestamp,
59
-
attempts -> Int4,
60
-
max_attempts -> Int4,
61
-
last_error -> Nullable<Text>,
62
-
retry_after -> Nullable<Timestamp>,
63
-
}
64
-
}
65
-
66
-
diesel::table! {
67
-
blocks (did, rkey) {
47
+
blocks (id) {
48
+
id -> Int8,
68
49
rkey -> Text,
69
-
did -> Text,
70
-
subject -> Text,
50
+
actor_id -> Int4,
51
+
subject_actor_id -> Int4,
71
52
created_at -> Timestamptz,
72
53
}
73
54
}
···
93
74
}
94
75
95
76
diesel::table! {
96
-
cursors (cursor_key) {
97
-
cursor_key -> Text,
98
-
timestamp_us -> Int8,
99
-
updated_at -> Timestamptz,
100
-
}
101
-
}
102
-
103
-
diesel::table! {
104
77
feedgens (at_uri) {
105
78
at_uri -> Text,
106
79
cid -> Text,
···
118
91
}
119
92
120
93
diesel::table! {
121
-
follows (did, rkey) {
94
+
follows (id) {
95
+
id -> Int8,
122
96
rkey -> Text,
123
-
did -> Text,
124
-
subject -> Text,
97
+
actor_id -> Int4,
98
+
subject_actor_id -> Int4,
125
99
created_at -> Timestamptz,
126
100
}
127
101
}
···
168
142
}
169
143
170
144
diesel::table! {
171
-
likes (did, rkey) {
145
+
likes (id) {
146
+
id -> Int8,
172
147
rkey -> Text,
173
-
did -> Text,
174
-
subject -> Text,
175
-
subject_cid -> Text,
148
+
actor_id -> Int4,
149
+
subject_record_id -> Int8,
176
150
created_at -> Timestamptz,
177
151
indexed_at -> Timestamp,
178
-
via_uri -> Nullable<Text>,
179
-
via_cid -> Nullable<Text>,
152
+
via_record_id -> Nullable<Int8>,
180
153
}
181
154
}
182
155
···
249
222
diesel::table! {
250
223
notifications (id) {
251
224
id -> Int8,
252
-
uri -> Text,
253
-
recipient_did -> Text,
254
-
author_did -> Text,
225
+
record_id -> Int8,
226
+
recipient_actor_id -> Int4,
227
+
author_actor_id -> Int4,
255
228
reason -> Text,
256
229
reason_subject -> Nullable<Text>,
257
-
cid -> Text,
230
+
cid_digest -> Bytea,
258
231
is_read -> Bool,
259
232
indexed_at -> Timestamp,
260
233
created_at -> Timestamp,
261
-
}
262
-
}
263
-
264
-
diesel::table! {
265
-
pds_hosts (host) {
266
-
host -> Text,
267
-
first_seen -> Timestamp,
268
-
last_seen -> Timestamp,
269
-
record_count -> Int8,
270
-
last_success -> Nullable<Timestamp>,
271
-
last_failure -> Nullable<Timestamp>,
272
-
failure_count -> Int4,
273
-
notes -> Nullable<Text>,
274
234
}
275
235
}
276
236
···
340
300
}
341
301
342
302
diesel::table! {
343
-
use diesel::sql_types::*;
344
-
use super::sql_types::Tsvector;
345
-
346
-
posts (at_uri) {
303
+
posts (id) {
304
+
id -> Int8,
347
305
at_uri -> Text,
348
-
cid -> Text,
349
-
did -> Text,
306
+
cid_digest -> Bytea,
307
+
actor_id -> Int4,
350
308
record -> Jsonb,
351
309
content -> Text,
352
310
facets -> Nullable<Jsonb>,
353
311
languages -> Array<Nullable<Text>>,
354
312
tags -> Array<Nullable<Text>>,
355
-
parent_uri -> Nullable<Text>,
356
-
parent_cid -> Nullable<Text>,
357
-
root_uri -> Nullable<Text>,
358
-
root_cid -> Nullable<Text>,
313
+
parent_record_id -> Nullable<Int8>,
314
+
root_record_id -> Nullable<Int8>,
359
315
embed -> Nullable<Text>,
360
316
embed_subtype -> Nullable<Text>,
361
-
created_at -> Timestamptz,
362
-
indexed_at -> Timestamp,
363
317
mentions -> Nullable<Array<Nullable<Text>>>,
364
318
violates_threadgate -> Bool,
365
-
search_vector -> Nullable<Tsvector>,
319
+
created_at -> Timestamptz,
320
+
indexed_at -> Timestamp,
366
321
}
367
322
}
368
323
···
403
358
}
404
359
405
360
diesel::table! {
406
-
records (at_uri) {
407
-
at_uri -> Text,
361
+
use diesel::sql_types::*;
362
+
use super::sql_types::RecordType;
363
+
364
+
records (id) {
365
+
id -> Int8,
366
+
actor_id -> Int4,
367
+
collection -> RecordType,
368
+
rkey -> Text,
408
369
cid -> Bytea,
409
-
did -> Text,
410
370
indexed_at -> Timestamp,
411
371
}
412
372
}
413
373
414
374
diesel::table! {
415
-
reposts (did, rkey) {
375
+
reposts (id) {
376
+
id -> Int8,
416
377
rkey -> Text,
417
-
did -> Text,
418
-
post -> Text,
419
-
post_cid -> Text,
378
+
actor_id -> Int4,
379
+
post_record_id -> Int8,
420
380
created_at -> Timestamptz,
421
381
indexed_at -> Timestamp,
422
-
via_uri -> Nullable<Text>,
423
-
via_cid -> Nullable<Text>,
382
+
via_record_id -> Nullable<Int8>,
424
383
}
425
384
}
426
385
···
461
420
}
462
421
463
422
diesel::table! {
423
+
thread_mutes (did, thread_root) {
424
+
did -> Text,
425
+
thread_root -> Text,
426
+
created_at -> Timestamptz,
427
+
}
428
+
}
429
+
430
+
diesel::table! {
464
431
threadgates (at_uri) {
465
432
at_uri -> Text,
466
433
cid -> Text,
···
475
442
}
476
443
477
444
diesel::table! {
478
-
thread_mutes (did, thread_root) {
479
-
did -> Text,
480
-
thread_root -> Text,
481
-
created_at -> Timestamptz,
482
-
}
483
-
}
484
-
485
-
diesel::table! {
486
445
verification (at_uri) {
487
446
at_uri -> Text,
488
447
cid -> Text,
···
495
454
}
496
455
}
497
456
498
-
diesel::joinable!(actor_pds_mapping -> pds_hosts (pds_host));
499
-
diesel::joinable!(blocks -> actors (did));
500
-
diesel::joinable!(bookmarks -> actors (did));
501
-
diesel::joinable!(chat_decls -> actors (did));
502
-
diesel::joinable!(feedgens -> actors (owner));
503
-
diesel::joinable!(follows -> actors (did));
457
+
diesel::joinable!(blocks -> actors (actor_id));
458
+
diesel::joinable!(follows -> actors (actor_id));
504
459
diesel::joinable!(labeler_defs -> labelers (labeler));
505
-
diesel::joinable!(labelers -> actors (did));
506
-
diesel::joinable!(likes -> actors (did));
507
-
diesel::joinable!(list_blocks -> actors (did));
508
-
diesel::joinable!(list_mutes -> actors (did));
509
-
diesel::joinable!(lists -> actors (owner));
510
-
diesel::joinable!(mutes -> actors (did));
511
-
diesel::joinable!(notif_decl -> actors (did));
512
-
diesel::joinable!(notification_seens -> actors (did));
513
-
diesel::joinable!(post_embed_ext -> posts (post_uri));
514
-
diesel::joinable!(post_embed_images -> posts (post_uri));
515
-
diesel::joinable!(post_embed_record -> posts (post_uri));
516
-
diesel::joinable!(post_embed_video -> posts (post_uri));
517
-
diesel::joinable!(post_embed_video_captions -> posts (post_uri));
518
-
diesel::joinable!(postgates -> posts (post_uri));
519
-
diesel::joinable!(posts -> actors (did));
520
-
diesel::joinable!(profiles -> actors (did));
521
-
diesel::joinable!(reposts -> actors (did));
522
-
diesel::joinable!(starterpacks -> actors (owner));
523
-
diesel::joinable!(statuses -> actors (did));
524
-
diesel::joinable!(threadgates -> posts (post_uri));
525
-
diesel::joinable!(thread_mutes -> actors (did));
526
-
diesel::joinable!(verification -> actors (verifier));
460
+
diesel::joinable!(likes -> actors (actor_id));
461
+
diesel::joinable!(likes -> records (subject_record_id));
462
+
diesel::joinable!(notifications -> actors (recipient_actor_id));
463
+
diesel::joinable!(notifications -> records (record_id));
464
+
diesel::joinable!(posts -> actors (actor_id));
465
+
diesel::joinable!(records -> actors (actor_id));
466
+
diesel::joinable!(reposts -> actors (actor_id));
467
+
diesel::joinable!(reposts -> records (post_record_id));
527
468
528
469
diesel::allow_tables_to_appear_in_same_query!(
529
-
actor_pds_mapping,
530
470
actors,
531
471
allowlist,
532
472
author_feeds,
533
-
backfill_jobs,
534
473
blocks,
535
474
bookmarks,
536
475
chat_decls,
537
-
cursors,
538
476
feedgens,
539
477
follows,
540
478
labeler_defs,
···
549
487
notif_decl,
550
488
notification_seens,
551
489
notifications,
552
-
pds_hosts,
553
490
post_embed_ext,
554
491
post_embed_images,
555
492
post_embed_record,
···
563
500
reposts,
564
501
starterpacks,
565
502
statuses,
566
-
threadgates,
567
503
thread_mutes,
504
+
threadgates,
568
505
verification,
569
506
);