commit 3d9fe82d83b945d2fe21fb4ffa84c568d24a0ff1 · quilling.dev/parakeet

+1 -1

diesel.toml

··· 4 4 [print_schema] 5 5 file = "parakeet-db/src/schema.rs" 6 6 custom_type_derives = ["diesel::query_builder::QueryId"] 7 - patch_file = "fk.patch" 7 + # patch_file = "fk.patch" 8 8 9 9 [migrations_directory] 10 10 dir = "migrations"

-18

fk.patch

··· 1 - diff --git a/parakeet-db/src/schema.rs b/parakeet-db/src/schema.rs 2 - index 59f65d9..a4219d5 100644 3 - --- a/parakeet-db/src/schema.rs 4 - +++ b/parakeet-db/src/schema.rs 5 - @@ -364,11 +364,13 @@ diesel::joinable!(post_embed_images -> posts (post_uri)); 6 - diesel::joinable!(post_embed_record -> posts (post_uri)); 7 - diesel::joinable!(post_embed_video -> posts (post_uri)); 8 - diesel::joinable!(post_embed_video_captions -> posts (post_uri)); 9 - +diesel::joinable!(postgates -> posts (post_uri)); 10 - diesel::joinable!(posts -> actors (did)); 11 - diesel::joinable!(profiles -> actors (did)); 12 - diesel::joinable!(reposts -> actors (did)); 13 - diesel::joinable!(starterpacks -> actors (owner)); 14 - diesel::joinable!(statuses -> actors (did)); 15 - +diesel::joinable!(threadgates -> posts (post_uri)); 16 - diesel::joinable!(verification -> actors (verifier)); 17 - 18 - diesel::allow_tables_to_appear_in_same_query!(

-10

migrations/2025-10-29-164300_drop_records_table/down.sql

··· 1 - -- Recreate the generic records table (if rollback is needed) 2 - CREATE TABLE records ( 3 - at_uri TEXT PRIMARY KEY, 4 - did TEXT NOT NULL, 5 - cid BYTEA NOT NULL, 6 - created_at TIMESTAMP NOT NULL DEFAULT NOW() 7 - ); 8 - 9 - CREATE INDEX idx_records_did ON records(did); 10 - CREATE INDEX idx_records_created_at ON records(created_at);

-3

migrations/2025-10-29-164300_drop_records_table/up.sql

··· 1 - -- Drop the generic records table 2 - -- This table was used for duplicate detection, but we now check collection-specific tables directly 3 - DROP TABLE IF EXISTS records;

+9

migrations/2025-10-30-020000_optimize_records_table/down.sql

··· 1 + -- Rollback: This migration cannot be rolled back safely 2 + -- 3 + -- The old tables are dropped as part of the forward migration, so rollback 4 + -- would result in complete data loss. 5 + -- 6 + -- If you need to rollback, you must restore from a backup taken before 7 + -- running the migration. 8 + 9 + SELECT 'ERROR: This migration cannot be rolled back. Restore from backup instead.' as error;

+440

migrations/2025-10-30-020000_optimize_records_table/up.sql

··· 1 + -- Comprehensive storage optimization: Normalize DIDs, optimize CIDs, use FKs to records 2 + -- 3 + -- This migration optimizes ALL major tables in one pass: 4 + -- - records: Remove at_uri, use actor_id + collection ENUM + rkey 5 + -- - likes: Replace did + subject + subject_cid with actor_id + subject_record_id 6 + -- - reposts: Replace did + post + post_cid with actor_id + post_record_id 7 + -- - posts: Replace did + parent/root URIs with actor_id + parent/root_record_id, strip CID headers 8 + -- - follows: Replace did with actor_id 9 + -- - blocks: Replace did with actor_id 10 + -- - notifications: Replace DIDs with actor_ids, strip CID header 11 + -- 12 + -- Total storage savings: ~6-8 GB (17-22% reduction) 13 + 14 + -- Step 1: Ensure actors table has id column 15 + DO $$ 16 + BEGIN 17 + IF NOT EXISTS (SELECT 1 FROM information_schema.columns 18 + WHERE table_name = 'actors' AND column_name = 'id') THEN 19 + ALTER TABLE actors ADD COLUMN id SERIAL UNIQUE; 20 + END IF; 21 + END $$; 22 + 23 + -- Step 2: Create record_type ENUM 24 + -- Extract all unique collection types from existing records 25 + DO $$ 26 + DECLARE 27 + collection_types TEXT[]; 28 + BEGIN 29 + -- Get all unique collection types 30 + SELECT array_agg(DISTINCT SPLIT_PART(at_uri, '/', 4) ORDER BY SPLIT_PART(at_uri, '/', 4)) 31 + INTO collection_types 32 + FROM records 33 + WHERE at_uri IS NOT NULL; 34 + 35 + -- Create enum type if it doesn't exist 36 + IF NOT EXISTS (SELECT 1 FROM pg_type WHERE typname = 'record_type') THEN 37 + EXECUTE format('CREATE TYPE record_type AS ENUM (%s)', 38 + (SELECT string_agg(quote_literal(t), ', ') FROM unnest(collection_types) t) 39 + ); 40 + END IF; 41 + END $$; 42 + 43 + -- Step 3: Create new optimized records table 44 + CREATE TABLE records_new ( 45 + id BIGSERIAL PRIMARY KEY, 46 + actor_id INTEGER NOT NULL, 47 + collection record_type NOT NULL, -- PostgreSQL ENUM (4 bytes) 48 + rkey TEXT NOT NULL, 49 + cid BYTEA NOT NULL, -- 32 bytes (header stripped), not 36 50 + indexed_at TIMESTAMP NOT NULL 51 + ); 52 + 53 + -- Step 4: Copy and transform data in a single pass 54 + -- This is the expensive operation but happens once 55 + INSERT INTO records_new (actor_id, collection, rkey, cid, indexed_at) 56 + SELECT 57 + a.id, 58 + SPLIT_PART(r.at_uri, '/', 4)::record_type as collection, 59 + SPLIT_PART(r.at_uri, '/', 5) as rkey, 60 + -- Strip 4-byte CID header (0x01711220), keep only 32-byte digest 61 + substring(r.cid from 5) as cid_digest, 62 + r.indexed_at 63 + FROM records r 64 + INNER JOIN actors a ON r.did = a.did 65 + WHERE a.id IS NOT NULL; -- Only migrate records where actor has ID 66 + 67 + -- Step 5: Create indexes on new table 68 + CREATE UNIQUE INDEX idx_records_new_composite ON records_new(actor_id, collection, rkey); 69 + CREATE INDEX idx_records_new_actor_id ON records_new(actor_id); 70 + CREATE INDEX idx_records_new_collection ON records_new(collection); 71 + CREATE INDEX idx_records_new_indexed_at ON records_new(indexed_at); 72 + 73 + -- Step 6: Add foreign key constraint 74 + ALTER TABLE records_new 75 + ADD CONSTRAINT fk_records_actor 76 + FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE; 77 + 78 + -- Step 7: Swap tables (brief exclusive lock) 79 + ALTER TABLE records RENAME TO records_old; 80 + ALTER TABLE records_new RENAME TO records; 81 + 82 + -- Step 8: Update actors primary key to use id instead of did 83 + ALTER TABLE actors DROP CONSTRAINT IF EXISTS actors_pkey CASCADE; 84 + ALTER TABLE actors ADD PRIMARY KEY (id); 85 + CREATE UNIQUE INDEX idx_actors_did ON actors(did); 86 + 87 + -- Step 8a: Drop old records table immediately 88 + DROP TABLE records_old; 89 + 90 + -------------------------------------------------------------------------------- 91 + -- LIKES TABLE OPTIMIZATION 92 + -------------------------------------------------------------------------------- 93 + 94 + -- Step 9: Create optimized likes table 95 + CREATE TABLE likes_new ( 96 + id BIGSERIAL PRIMARY KEY, 97 + rkey TEXT NOT NULL, 98 + actor_id INTEGER NOT NULL, 99 + subject_record_id BIGINT NOT NULL, 100 + created_at TIMESTAMPTZ NOT NULL, 101 + indexed_at TIMESTAMP NOT NULL, 102 + via_record_id BIGINT -- NULL if no via 103 + ); 104 + 105 + -- Step 10: Transform likes data 106 + -- Strategy: Parse subject URI, join to records via composite key 107 + INSERT INTO likes_new (rkey, actor_id, subject_record_id, created_at, indexed_at, via_record_id) 108 + SELECT 109 + l.rkey, 110 + a.id, 111 + r_subject.id, 112 + l.created_at, 113 + l.indexed_at, 114 + r_via.id 115 + FROM likes l 116 + INNER JOIN actors a ON l.did = a.did 117 + -- Parse subject URI and find matching record 118 + INNER JOIN actors actors_subj ON SPLIT_PART(l.subject, '/', 3) = actors_subj.did 119 + INNER JOIN records r_subject ON 120 + r_subject.actor_id = actors_subj.id 121 + AND r_subject.collection::text = SPLIT_PART(l.subject, '/', 4) 122 + AND r_subject.rkey = SPLIT_PART(l.subject, '/', 5) 123 + -- Parse via_uri and find matching record if exists 124 + LEFT JOIN actors actors_via ON l.via_uri IS NOT NULL AND SPLIT_PART(l.via_uri, '/', 3) = actors_via.did 125 + LEFT JOIN records r_via ON 126 + r_via.actor_id = actors_via.id 127 + AND r_via.collection::text = SPLIT_PART(l.via_uri, '/', 4) 128 + AND r_via.rkey = SPLIT_PART(l.via_uri, '/', 5) 129 + WHERE a.id IS NOT NULL; 130 + 131 + -- Step 11: Create indexes on likes_new 132 + CREATE UNIQUE INDEX idx_likes_new_actor_rkey ON likes_new(actor_id, rkey); 133 + CREATE INDEX idx_likes_new_subject ON likes_new(subject_record_id); 134 + CREATE INDEX idx_likes_new_indexed_at ON likes_new(indexed_at); 135 + CREATE INDEX idx_likes_new_via ON likes_new(via_record_id) WHERE via_record_id IS NOT NULL; 136 + 137 + -- Step 12: Add foreign key constraints for likes 138 + ALTER TABLE likes_new 139 + ADD CONSTRAINT fk_likes_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE, 140 + ADD CONSTRAINT fk_likes_subject FOREIGN KEY (subject_record_id) REFERENCES records(id) ON DELETE CASCADE, 141 + ADD CONSTRAINT fk_likes_via FOREIGN KEY (via_record_id) REFERENCES records(id) ON DELETE SET NULL; 142 + 143 + -- Step 13: Swap likes tables 144 + ALTER TABLE likes RENAME TO likes_old; 145 + ALTER TABLE likes_new RENAME TO likes; 146 + 147 + -- Step 13a: Drop old likes table immediately 148 + DROP TABLE likes_old; 149 + 150 + -------------------------------------------------------------------------------- 151 + -- REPOSTS TABLE OPTIMIZATION 152 + -------------------------------------------------------------------------------- 153 + 154 + -- Step 14: Create optimized reposts table 155 + CREATE TABLE reposts_new ( 156 + id BIGSERIAL PRIMARY KEY, 157 + rkey TEXT NOT NULL, 158 + actor_id INTEGER NOT NULL, 159 + post_record_id BIGINT NOT NULL, 160 + created_at TIMESTAMPTZ NOT NULL, 161 + indexed_at TIMESTAMP NOT NULL, 162 + via_record_id BIGINT -- NULL if no via 163 + ); 164 + 165 + -- Step 15: Transform reposts data 166 + INSERT INTO reposts_new (rkey, actor_id, post_record_id, created_at, indexed_at, via_record_id) 167 + SELECT 168 + rp.rkey, 169 + a.id, 170 + r_post.id, 171 + rp.created_at, 172 + rp.indexed_at, 173 + r_via.id 174 + FROM reposts rp 175 + INNER JOIN actors a ON rp.did = a.did 176 + -- Parse post URI and find matching record 177 + INNER JOIN actors actors_post ON SPLIT_PART(rp.post, '/', 3) = actors_post.did 178 + INNER JOIN records r_post ON 179 + r_post.actor_id = actors_post.id 180 + AND r_post.collection::text = SPLIT_PART(rp.post, '/', 4) 181 + AND r_post.rkey = SPLIT_PART(rp.post, '/', 5) 182 + -- Parse via_uri and find matching record if exists 183 + LEFT JOIN actors actors_via ON rp.via_uri IS NOT NULL AND SPLIT_PART(rp.via_uri, '/', 3) = actors_via.did 184 + LEFT JOIN records r_via ON 185 + r_via.actor_id = actors_via.id 186 + AND r_via.collection::text = SPLIT_PART(rp.via_uri, '/', 4) 187 + AND r_via.rkey = SPLIT_PART(rp.via_uri, '/', 5) 188 + WHERE a.id IS NOT NULL; 189 + 190 + -- Step 16: Create indexes on reposts_new 191 + CREATE UNIQUE INDEX idx_reposts_new_actor_rkey ON reposts_new(actor_id, rkey); 192 + CREATE INDEX idx_reposts_new_post ON reposts_new(post_record_id); 193 + CREATE INDEX idx_reposts_new_indexed_at ON reposts_new(indexed_at); 194 + CREATE INDEX idx_reposts_new_via ON reposts_new(via_record_id) WHERE via_record_id IS NOT NULL; 195 + 196 + -- Step 17: Add foreign key constraints for reposts 197 + ALTER TABLE reposts_new 198 + ADD CONSTRAINT fk_reposts_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE, 199 + ADD CONSTRAINT fk_reposts_post FOREIGN KEY (post_record_id) REFERENCES records(id) ON DELETE CASCADE, 200 + ADD CONSTRAINT fk_reposts_via FOREIGN KEY (via_record_id) REFERENCES records(id) ON DELETE SET NULL; 201 + 202 + -- Step 18: Swap reposts tables 203 + ALTER TABLE reposts RENAME TO reposts_old; 204 + ALTER TABLE reposts_new RENAME TO reposts; 205 + 206 + -- Step 18a: Drop old reposts table immediately 207 + DROP TABLE reposts_old; 208 + 209 + -------------------------------------------------------------------------------- 210 + -- POSTS TABLE OPTIMIZATION 211 + -------------------------------------------------------------------------------- 212 + 213 + -- Step 19: Create optimized posts table 214 + CREATE TABLE posts_new ( 215 + id BIGSERIAL PRIMARY KEY, 216 + at_uri TEXT NOT NULL UNIQUE, -- Keep at_uri as it's referenced by many tables 217 + cid_digest BYTEA NOT NULL, -- Strip header, 32 bytes 218 + actor_id INTEGER NOT NULL, 219 + record JSONB NOT NULL, 220 + 221 + content TEXT NOT NULL, 222 + facets JSONB, 223 + languages TEXT[] NOT NULL, 224 + tags TEXT[] NOT NULL, 225 + 226 + parent_record_id BIGINT, -- FK to records instead of URI+CID 227 + root_record_id BIGINT, -- FK to records instead of URI+CID 228 + 229 + embed TEXT, 230 + embed_subtype TEXT, 231 + 232 + mentions TEXT[], 233 + violates_threadgate BOOLEAN NOT NULL DEFAULT false, 234 + 235 + created_at TIMESTAMPTZ NOT NULL, 236 + indexed_at TIMESTAMP NOT NULL 237 + ); 238 + 239 + -- Step 20: Transform posts data 240 + -- Note: posts.cid is TEXT (base32), we need to convert to binary digest 241 + INSERT INTO posts_new (at_uri, cid_digest, actor_id, record, content, facets, languages, tags, 242 + parent_record_id, root_record_id, embed, embed_subtype, mentions, 243 + violates_threadgate, created_at, indexed_at) 244 + SELECT 245 + p.at_uri, 246 + decode(substring(p.cid from 9), 'hex'), -- Strip text CID header, store binary digest 247 + a.id, 248 + p.record, 249 + p.content, 250 + p.facets, 251 + p.languages, 252 + p.tags, 253 + r_parent.id, 254 + r_root.id, 255 + p.embed, 256 + p.embed_subtype, 257 + p.mentions, 258 + p.violates_threadgate, 259 + p.created_at, 260 + p.indexed_at 261 + FROM posts p 262 + INNER JOIN actors a ON p.did = a.did 263 + -- Parse parent_uri and find matching record if exists 264 + LEFT JOIN actors actors_parent ON p.parent_uri IS NOT NULL AND SPLIT_PART(p.parent_uri, '/', 3) = actors_parent.did 265 + LEFT JOIN records r_parent ON 266 + r_parent.actor_id = actors_parent.id 267 + AND r_parent.collection::text = SPLIT_PART(p.parent_uri, '/', 4) 268 + AND r_parent.rkey = SPLIT_PART(p.parent_uri, '/', 5) 269 + -- Parse root_uri and find matching record if exists 270 + LEFT JOIN actors actors_root ON p.root_uri IS NOT NULL AND SPLIT_PART(p.root_uri, '/', 3) = actors_root.did 271 + LEFT JOIN records r_root ON 272 + r_root.actor_id = actors_root.id 273 + AND r_root.collection::text = SPLIT_PART(p.root_uri, '/', 4) 274 + AND r_root.rkey = SPLIT_PART(p.root_uri, '/', 5) 275 + WHERE a.id IS NOT NULL; 276 + 277 + -- Step 21: Create indexes on posts_new 278 + CREATE INDEX idx_posts_new_actor ON posts_new(actor_id); 279 + CREATE INDEX idx_posts_new_created_at ON posts_new(created_at); 280 + CREATE INDEX idx_posts_new_indexed_at ON posts_new(indexed_at); 281 + CREATE INDEX idx_posts_new_parent ON posts_new(parent_record_id) WHERE parent_record_id IS NOT NULL; 282 + CREATE INDEX idx_posts_new_root ON posts_new(root_record_id) WHERE root_record_id IS NOT NULL; 283 + 284 + -- Step 22: Add foreign key constraints for posts 285 + ALTER TABLE posts_new 286 + ADD CONSTRAINT fk_posts_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE, 287 + ADD CONSTRAINT fk_posts_parent FOREIGN KEY (parent_record_id) REFERENCES records(id) ON DELETE SET NULL, 288 + ADD CONSTRAINT fk_posts_root FOREIGN KEY (root_record_id) REFERENCES records(id) ON DELETE SET NULL; 289 + 290 + -- Step 23: Swap posts tables 291 + ALTER TABLE posts RENAME TO posts_old; 292 + ALTER TABLE posts_new RENAME TO posts; 293 + 294 + -- Step 23a: Drop old posts table immediately 295 + DROP TABLE posts_old; 296 + 297 + -------------------------------------------------------------------------------- 298 + -- FOLLOWS TABLE OPTIMIZATION 299 + -------------------------------------------------------------------------------- 300 + 301 + -- Step 24: Create optimized follows table 302 + CREATE TABLE follows_new ( 303 + id BIGSERIAL PRIMARY KEY, 304 + rkey TEXT NOT NULL, 305 + actor_id INTEGER NOT NULL, 306 + subject_actor_id INTEGER NOT NULL, 307 + created_at TIMESTAMPTZ NOT NULL, 308 + UNIQUE(actor_id, rkey) 309 + ); 310 + 311 + -- Step 25: Transform follows data 312 + INSERT INTO follows_new (rkey, actor_id, subject_actor_id, created_at) 313 + SELECT 314 + f.rkey, 315 + a.id, 316 + a_subject.id, 317 + f.created_at 318 + FROM follows f 319 + INNER JOIN actors a ON f.did = a.did 320 + INNER JOIN actors a_subject ON f.subject = a_subject.did 321 + WHERE a.id IS NOT NULL AND a_subject.id IS NOT NULL; 322 + 323 + -- Step 26: Create indexes on follows_new 324 + CREATE INDEX idx_follows_new_subject ON follows_new(subject_actor_id); 325 + CREATE INDEX idx_follows_new_created_at ON follows_new(created_at); 326 + 327 + -- Step 27: Add foreign key constraints for follows 328 + ALTER TABLE follows_new 329 + ADD CONSTRAINT fk_follows_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE, 330 + ADD CONSTRAINT fk_follows_subject FOREIGN KEY (subject_actor_id) REFERENCES actors(id) ON DELETE CASCADE; 331 + 332 + -- Step 28: Swap follows tables 333 + ALTER TABLE follows RENAME TO follows_old; 334 + ALTER TABLE follows_new RENAME TO follows; 335 + 336 + -- Step 28a: Drop old follows table immediately 337 + DROP TABLE follows_old; 338 + 339 + -------------------------------------------------------------------------------- 340 + -- BLOCKS TABLE OPTIMIZATION 341 + -------------------------------------------------------------------------------- 342 + 343 + -- Step 29: Create optimized blocks table 344 + CREATE TABLE blocks_new ( 345 + id BIGSERIAL PRIMARY KEY, 346 + rkey TEXT NOT NULL, 347 + actor_id INTEGER NOT NULL, 348 + subject_actor_id INTEGER NOT NULL, 349 + created_at TIMESTAMPTZ NOT NULL, 350 + UNIQUE(actor_id, rkey) 351 + ); 352 + 353 + -- Step 30: Transform blocks data 354 + INSERT INTO blocks_new (rkey, actor_id, subject_actor_id, created_at) 355 + SELECT 356 + b.rkey, 357 + a.id, 358 + a_subject.id, 359 + b.created_at 360 + FROM blocks b 361 + INNER JOIN actors a ON b.did = a.did 362 + INNER JOIN actors a_subject ON b.subject = a_subject.did 363 + WHERE a.id IS NOT NULL AND a_subject.id IS NOT NULL; 364 + 365 + -- Step 31: Create indexes on blocks_new 366 + CREATE INDEX idx_blocks_new_subject ON blocks_new(subject_actor_id); 367 + CREATE INDEX idx_blocks_new_created_at ON blocks_new(created_at); 368 + 369 + -- Step 32: Add foreign key constraints for blocks 370 + ALTER TABLE blocks_new 371 + ADD CONSTRAINT fk_blocks_actor FOREIGN KEY (actor_id) REFERENCES actors(id) ON DELETE CASCADE, 372 + ADD CONSTRAINT fk_blocks_subject FOREIGN KEY (subject_actor_id) REFERENCES actors(id) ON DELETE CASCADE; 373 + 374 + -- Step 33: Swap blocks tables 375 + ALTER TABLE blocks RENAME TO blocks_old; 376 + ALTER TABLE blocks_new RENAME TO blocks; 377 + 378 + -- Step 33a: Drop old blocks table immediately 379 + DROP TABLE blocks_old; 380 + 381 + -------------------------------------------------------------------------------- 382 + -- NOTIFICATIONS TABLE OPTIMIZATION 383 + -------------------------------------------------------------------------------- 384 + 385 + -- Step 34: Create optimized notifications table 386 + CREATE TABLE notifications_new ( 387 + id BIGSERIAL PRIMARY KEY, 388 + record_id BIGINT NOT NULL, -- FK to records instead of URI 389 + recipient_actor_id INTEGER NOT NULL, 390 + author_actor_id INTEGER NOT NULL, 391 + reason TEXT NOT NULL, 392 + reason_subject TEXT, 393 + cid_digest BYTEA NOT NULL, -- Strip header, 32 bytes 394 + is_read BOOLEAN NOT NULL DEFAULT false, 395 + indexed_at TIMESTAMP NOT NULL, 396 + created_at TIMESTAMP NOT NULL 397 + ); 398 + 399 + -- Step 35: Transform notifications data 400 + INSERT INTO notifications_new (record_id, recipient_actor_id, author_actor_id, reason, 401 + reason_subject, cid_digest, is_read, indexed_at, created_at) 402 + SELECT 403 + r.id, 404 + a_recipient.id, 405 + a_author.id, 406 + n.reason, 407 + n.reason_subject, 408 + decode(substring(n.cid from 9), 'hex'), -- Strip text CID header 409 + n.is_read, 410 + n.indexed_at, 411 + n.created_at 412 + FROM notifications n 413 + INNER JOIN actors a_recipient ON n.recipient_did = a_recipient.did 414 + INNER JOIN actors a_author ON n.author_did = a_author.did 415 + -- Parse notification URI and find matching record 416 + INNER JOIN actors actors_record ON SPLIT_PART(n.uri, '/', 3) = actors_record.did 417 + INNER JOIN records r ON 418 + r.actor_id = actors_record.id 419 + AND r.collection::text = SPLIT_PART(n.uri, '/', 4) 420 + AND r.rkey = SPLIT_PART(n.uri, '/', 5) 421 + WHERE a_recipient.id IS NOT NULL AND a_author.id IS NOT NULL; 422 + 423 + -- Step 36: Create indexes on notifications_new 424 + CREATE INDEX idx_notifications_new_recipient ON notifications_new(recipient_actor_id); 425 + CREATE INDEX idx_notifications_new_author ON notifications_new(author_actor_id); 426 + CREATE INDEX idx_notifications_new_record ON notifications_new(record_id); 427 + CREATE INDEX idx_notifications_new_indexed_at ON notifications_new(indexed_at); 428 + 429 + -- Step 37: Add foreign key constraints for notifications 430 + ALTER TABLE notifications_new 431 + ADD CONSTRAINT fk_notifications_recipient FOREIGN KEY (recipient_actor_id) REFERENCES actors(id) ON DELETE CASCADE, 432 + ADD CONSTRAINT fk_notifications_author FOREIGN KEY (author_actor_id) REFERENCES actors(id) ON DELETE CASCADE, 433 + ADD CONSTRAINT fk_notifications_record FOREIGN KEY (record_id) REFERENCES records(id) ON DELETE CASCADE; 434 + 435 + -- Step 38: Swap notifications tables 436 + ALTER TABLE notifications RENAME TO notifications_old; 437 + ALTER TABLE notifications_new RENAME TO notifications; 438 + 439 + -- Step 38a: Drop old notifications table immediately 440 + DROP TABLE notifications_old;

+91 -12

parakeet-db/src/models.rs

··· 5 5 6 6 #[derive(Debug, Queryable, Selectable, Identifiable)] 7 7 #[diesel(table_name = crate::schema::actors)] 8 - #[diesel(primary_key(did))] 8 + #[diesel(primary_key(id))] 9 9 #[diesel(check_for_backend(diesel::pg::Pg))] 10 10 pub struct Actor { 11 11 pub did: String, ··· 15 15 pub repo_rev: Option<String>, 16 16 pub repo_cid: Option<String>, 17 17 pub last_indexed: Option<NaiveDateTime>, 18 + pub id: i32, 18 19 } 19 20 20 21 #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] ··· 114 115 115 116 #[derive(Clone, Debug, Serialize, Deserialize, Queryable, Selectable, Identifiable)] 116 117 #[diesel(table_name = crate::schema::posts)] 117 - #[diesel(primary_key(at_uri))] 118 + #[diesel(primary_key(id))] 118 119 #[diesel(check_for_backend(diesel::pg::Pg))] 119 120 pub struct Post { 121 + pub id: i64, 120 122 pub at_uri: String, 121 - pub cid: String, 122 - pub did: String, 123 + pub cid_digest: Vec<u8>, 124 + pub actor_id: i32, 123 125 pub record: serde_json::Value, 124 126 125 127 pub content: String, ··· 127 129 pub languages: not_null_vec::TextArray, 128 130 pub tags: not_null_vec::TextArray, 129 131 130 - pub parent_uri: Option<String>, 131 - pub parent_cid: Option<String>, 132 - pub root_uri: Option<String>, 133 - pub root_cid: Option<String>, 132 + pub parent_record_id: Option<i64>, 133 + pub root_record_id: Option<i64>, 134 134 135 135 pub embed: Option<String>, 136 136 pub embed_subtype: Option<String>, ··· 347 347 #[diesel(check_for_backend(diesel::pg::Pg))] 348 348 pub struct Notification { 349 349 pub id: i64, 350 - pub uri: String, 351 - pub recipient_did: String, 352 - pub author_did: String, 350 + pub record_id: i64, 351 + pub recipient_actor_id: i32, 352 + pub author_actor_id: i32, 353 353 pub reason: String, 354 354 pub reason_subject: Option<String>, 355 - pub cid: String, 355 + pub cid_digest: Vec<u8>, 356 356 pub is_read: bool, 357 357 pub indexed_at: NaiveDateTime, 358 358 pub created_at: NaiveDateTime, ··· 440 440 pub did: String, 441 441 pub typ: String, 442 442 pub sort_at: DateTime<Utc>, 443 + } 444 + 445 + // Optimized table models 446 + 447 + // Wrapper type for RecordType to allow String deserialization 448 + #[derive(Debug, Clone, diesel::expression::AsExpression, diesel::deserialize::FromSqlRow)] 449 + #[diesel(sql_type = crate::schema::sql_types::RecordType)] 450 + pub struct RecordTypeWrapper(pub String); 451 + 452 + impl diesel::deserialize::FromSql<crate::schema::sql_types::RecordType, diesel::pg::Pg> for RecordTypeWrapper { 453 + fn from_sql(bytes: diesel::pg::PgValue) -> diesel::deserialize::Result<Self> { 454 + let s = <String as diesel::deserialize::FromSql<diesel::sql_types::Text, diesel::pg::Pg>>::from_sql(bytes)?; 455 + Ok(RecordTypeWrapper(s)) 456 + } 457 + } 458 + 459 + #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 460 + #[diesel(table_name = crate::schema::records)] 461 + #[diesel(primary_key(id))] 462 + #[diesel(check_for_backend(diesel::pg::Pg))] 463 + pub struct Record { 464 + pub id: i64, 465 + pub actor_id: i32, 466 + pub collection: RecordTypeWrapper, 467 + pub rkey: String, 468 + pub cid: Vec<u8>, // 32-byte digest (header stripped) 469 + pub indexed_at: NaiveDateTime, 470 + } 471 + 472 + #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 473 + #[diesel(table_name = crate::schema::likes)] 474 + #[diesel(primary_key(id))] 475 + #[diesel(check_for_backend(diesel::pg::Pg))] 476 + pub struct Like { 477 + pub id: i64, 478 + pub rkey: String, 479 + pub actor_id: i32, 480 + pub subject_record_id: i64, 481 + pub created_at: DateTime<Utc>, 482 + pub indexed_at: NaiveDateTime, 483 + pub via_record_id: Option<i64>, 484 + } 485 + 486 + #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 487 + #[diesel(table_name = crate::schema::reposts)] 488 + #[diesel(primary_key(id))] 489 + #[diesel(check_for_backend(diesel::pg::Pg))] 490 + pub struct Repost { 491 + pub id: i64, 492 + pub rkey: String, 493 + pub actor_id: i32, 494 + pub post_record_id: i64, 495 + pub created_at: DateTime<Utc>, 496 + pub indexed_at: NaiveDateTime, 497 + pub via_record_id: Option<i64>, 498 + } 499 + 500 + #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 501 + #[diesel(table_name = crate::schema::follows)] 502 + #[diesel(primary_key(id))] 503 + #[diesel(check_for_backend(diesel::pg::Pg))] 504 + pub struct Follow { 505 + pub id: i64, 506 + pub rkey: String, 507 + pub actor_id: i32, 508 + pub subject_actor_id: i32, 509 + pub created_at: DateTime<Utc>, 510 + } 511 + 512 + #[derive(Clone, Debug, Queryable, Selectable, Identifiable)] 513 + #[diesel(table_name = crate::schema::blocks)] 514 + #[diesel(primary_key(id))] 515 + #[diesel(check_for_backend(diesel::pg::Pg))] 516 + pub struct Block { 517 + pub id: i64, 518 + pub rkey: String, 519 + pub actor_id: i32, 520 + pub subject_actor_id: i32, 521 + pub created_at: DateTime<Utc>, 443 522 } 444 523 445 524 pub use not_null_vec::TextArray;

+63 -126

parakeet-db/src/schema.rs

··· 2 2 3 3 pub mod sql_types { 4 4 #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] 5 + #[diesel(postgres_type(name = "record_type"))] 6 + pub struct RecordType; 7 + 8 + #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] 5 9 #[diesel(postgres_type(name = "tsvector", schema = "pg_catalog"))] 6 10 pub struct Tsvector; 7 11 } 8 12 9 13 diesel::table! { 10 - actor_pds_mapping (did) { 11 - did -> Text, 12 - pds_host -> Text, 13 - resolved_at -> Timestamp, 14 - resolution_method -> Text, 15 - confidence -> Text, 16 - } 17 - } 18 - 19 - diesel::table! { 20 - actors (did) { 14 + actors (id) { 21 15 did -> Text, 22 16 handle -> Nullable<Text>, 23 17 status -> Text, ··· 25 19 repo_rev -> Nullable<Text>, 26 20 repo_cid -> Nullable<Text>, 27 21 last_indexed -> Nullable<Timestamp>, 22 + id -> Int4, 28 23 } 29 24 } 30 25 ··· 49 44 } 50 45 51 46 diesel::table! { 52 - backfill_jobs (id) { 53 - id -> Int4, 54 - did -> Text, 55 - since -> Nullable<Text>, 56 - status -> Text, 57 - created_at -> Timestamp, 58 - updated_at -> Timestamp, 59 - attempts -> Int4, 60 - max_attempts -> Int4, 61 - last_error -> Nullable<Text>, 62 - retry_after -> Nullable<Timestamp>, 63 - } 64 - } 65 - 66 - diesel::table! { 67 - blocks (did, rkey) { 47 + blocks (id) { 48 + id -> Int8, 68 49 rkey -> Text, 69 - did -> Text, 70 - subject -> Text, 50 + actor_id -> Int4, 51 + subject_actor_id -> Int4, 71 52 created_at -> Timestamptz, 72 53 } 73 54 } ··· 93 74 } 94 75 95 76 diesel::table! { 96 - cursors (cursor_key) { 97 - cursor_key -> Text, 98 - timestamp_us -> Int8, 99 - updated_at -> Timestamptz, 100 - } 101 - } 102 - 103 - diesel::table! { 104 77 feedgens (at_uri) { 105 78 at_uri -> Text, 106 79 cid -> Text, ··· 118 91 } 119 92 120 93 diesel::table! { 121 - follows (did, rkey) { 94 + follows (id) { 95 + id -> Int8, 122 96 rkey -> Text, 123 - did -> Text, 124 - subject -> Text, 97 + actor_id -> Int4, 98 + subject_actor_id -> Int4, 125 99 created_at -> Timestamptz, 126 100 } 127 101 } ··· 168 142 } 169 143 170 144 diesel::table! { 171 - likes (did, rkey) { 145 + likes (id) { 146 + id -> Int8, 172 147 rkey -> Text, 173 - did -> Text, 174 - subject -> Text, 175 - subject_cid -> Text, 148 + actor_id -> Int4, 149 + subject_record_id -> Int8, 176 150 created_at -> Timestamptz, 177 151 indexed_at -> Timestamp, 178 - via_uri -> Nullable<Text>, 179 - via_cid -> Nullable<Text>, 152 + via_record_id -> Nullable<Int8>, 180 153 } 181 154 } 182 155 ··· 249 222 diesel::table! { 250 223 notifications (id) { 251 224 id -> Int8, 252 - uri -> Text, 253 - recipient_did -> Text, 254 - author_did -> Text, 225 + record_id -> Int8, 226 + recipient_actor_id -> Int4, 227 + author_actor_id -> Int4, 255 228 reason -> Text, 256 229 reason_subject -> Nullable<Text>, 257 - cid -> Text, 230 + cid_digest -> Bytea, 258 231 is_read -> Bool, 259 232 indexed_at -> Timestamp, 260 233 created_at -> Timestamp, 261 - } 262 - } 263 - 264 - diesel::table! { 265 - pds_hosts (host) { 266 - host -> Text, 267 - first_seen -> Timestamp, 268 - last_seen -> Timestamp, 269 - record_count -> Int8, 270 - last_success -> Nullable<Timestamp>, 271 - last_failure -> Nullable<Timestamp>, 272 - failure_count -> Int4, 273 - notes -> Nullable<Text>, 274 234 } 275 235 } 276 236 ··· 340 300 } 341 301 342 302 diesel::table! { 343 - use diesel::sql_types::*; 344 - use super::sql_types::Tsvector; 345 - 346 - posts (at_uri) { 303 + posts (id) { 304 + id -> Int8, 347 305 at_uri -> Text, 348 - cid -> Text, 349 - did -> Text, 306 + cid_digest -> Bytea, 307 + actor_id -> Int4, 350 308 record -> Jsonb, 351 309 content -> Text, 352 310 facets -> Nullable<Jsonb>, 353 311 languages -> Array<Nullable<Text>>, 354 312 tags -> Array<Nullable<Text>>, 355 - parent_uri -> Nullable<Text>, 356 - parent_cid -> Nullable<Text>, 357 - root_uri -> Nullable<Text>, 358 - root_cid -> Nullable<Text>, 313 + parent_record_id -> Nullable<Int8>, 314 + root_record_id -> Nullable<Int8>, 359 315 embed -> Nullable<Text>, 360 316 embed_subtype -> Nullable<Text>, 361 - created_at -> Timestamptz, 362 - indexed_at -> Timestamp, 363 317 mentions -> Nullable<Array<Nullable<Text>>>, 364 318 violates_threadgate -> Bool, 365 - search_vector -> Nullable<Tsvector>, 319 + created_at -> Timestamptz, 320 + indexed_at -> Timestamp, 366 321 } 367 322 } 368 323 ··· 403 358 } 404 359 405 360 diesel::table! { 406 - records (at_uri) { 407 - at_uri -> Text, 361 + use diesel::sql_types::*; 362 + use super::sql_types::RecordType; 363 + 364 + records (id) { 365 + id -> Int8, 366 + actor_id -> Int4, 367 + collection -> RecordType, 368 + rkey -> Text, 408 369 cid -> Bytea, 409 - did -> Text, 410 370 indexed_at -> Timestamp, 411 371 } 412 372 } 413 373 414 374 diesel::table! { 415 - reposts (did, rkey) { 375 + reposts (id) { 376 + id -> Int8, 416 377 rkey -> Text, 417 - did -> Text, 418 - post -> Text, 419 - post_cid -> Text, 378 + actor_id -> Int4, 379 + post_record_id -> Int8, 420 380 created_at -> Timestamptz, 421 381 indexed_at -> Timestamp, 422 - via_uri -> Nullable<Text>, 423 - via_cid -> Nullable<Text>, 382 + via_record_id -> Nullable<Int8>, 424 383 } 425 384 } 426 385 ··· 461 420 } 462 421 463 422 diesel::table! { 423 + thread_mutes (did, thread_root) { 424 + did -> Text, 425 + thread_root -> Text, 426 + created_at -> Timestamptz, 427 + } 428 + } 429 + 430 + diesel::table! { 464 431 threadgates (at_uri) { 465 432 at_uri -> Text, 466 433 cid -> Text, ··· 475 442 } 476 443 477 444 diesel::table! { 478 - thread_mutes (did, thread_root) { 479 - did -> Text, 480 - thread_root -> Text, 481 - created_at -> Timestamptz, 482 - } 483 - } 484 - 485 - diesel::table! { 486 445 verification (at_uri) { 487 446 at_uri -> Text, 488 447 cid -> Text, ··· 495 454 } 496 455 } 497 456 498 - diesel::joinable!(actor_pds_mapping -> pds_hosts (pds_host)); 499 - diesel::joinable!(blocks -> actors (did)); 500 - diesel::joinable!(bookmarks -> actors (did)); 501 - diesel::joinable!(chat_decls -> actors (did)); 502 - diesel::joinable!(feedgens -> actors (owner)); 503 - diesel::joinable!(follows -> actors (did)); 457 + diesel::joinable!(blocks -> actors (actor_id)); 458 + diesel::joinable!(follows -> actors (actor_id)); 504 459 diesel::joinable!(labeler_defs -> labelers (labeler)); 505 - diesel::joinable!(labelers -> actors (did)); 506 - diesel::joinable!(likes -> actors (did)); 507 - diesel::joinable!(list_blocks -> actors (did)); 508 - diesel::joinable!(list_mutes -> actors (did)); 509 - diesel::joinable!(lists -> actors (owner)); 510 - diesel::joinable!(mutes -> actors (did)); 511 - diesel::joinable!(notif_decl -> actors (did)); 512 - diesel::joinable!(notification_seens -> actors (did)); 513 - diesel::joinable!(post_embed_ext -> posts (post_uri)); 514 - diesel::joinable!(post_embed_images -> posts (post_uri)); 515 - diesel::joinable!(post_embed_record -> posts (post_uri)); 516 - diesel::joinable!(post_embed_video -> posts (post_uri)); 517 - diesel::joinable!(post_embed_video_captions -> posts (post_uri)); 518 - diesel::joinable!(postgates -> posts (post_uri)); 519 - diesel::joinable!(posts -> actors (did)); 520 - diesel::joinable!(profiles -> actors (did)); 521 - diesel::joinable!(reposts -> actors (did)); 522 - diesel::joinable!(starterpacks -> actors (owner)); 523 - diesel::joinable!(statuses -> actors (did)); 524 - diesel::joinable!(threadgates -> posts (post_uri)); 525 - diesel::joinable!(thread_mutes -> actors (did)); 526 - diesel::joinable!(verification -> actors (verifier)); 460 + diesel::joinable!(likes -> actors (actor_id)); 461 + diesel::joinable!(likes -> records (subject_record_id)); 462 + diesel::joinable!(notifications -> actors (recipient_actor_id)); 463 + diesel::joinable!(notifications -> records (record_id)); 464 + diesel::joinable!(posts -> actors (actor_id)); 465 + diesel::joinable!(records -> actors (actor_id)); 466 + diesel::joinable!(reposts -> actors (actor_id)); 467 + diesel::joinable!(reposts -> records (post_record_id)); 527 468 528 469 diesel::allow_tables_to_appear_in_same_query!( 529 - actor_pds_mapping, 530 470 actors, 531 471 allowlist, 532 472 author_feeds, 533 - backfill_jobs, 534 473 blocks, 535 474 bookmarks, 536 475 chat_decls, 537 - cursors, 538 476 feedgens, 539 477 follows, 540 478 labeler_defs, ··· 549 487 notif_decl, 550 488 notification_seens, 551 489 notifications, 552 - pds_hosts, 553 490 post_embed_ext, 554 491 post_embed_images, 555 492 post_embed_record, ··· 563 500 reposts, 564 501 starterpacks, 565 502 statuses, 566 - threadgates, 567 503 thread_mutes, 504 + threadgates, 568 505 verification, 569 506 );