chore: cleanup migrations · quilling.dev/parakeet@aca2779

+13 -9

migrations/2025-11-01-114839_posts/up.sql

··· 18 18 19 19 -- Language Codes (ISO 639-1 + regional variants) 20 20 -- Includes all language codes from historical database (4.6M posts) 21 + -- Extended with 38 additional codes for broader language coverage 21 22 CREATE TYPE language_code AS ENUM ( 22 23 -- Original 20 codes 23 24 'en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh', ··· 39 40 'zxx', 'cu', 'en-CA', 'es-CL', 'haw', 'en-UK', 'zh-CN', 'iw', 'tt', 'ba', 40 41 'ms', 'in', 'mt', 'fr-CA', 'oc', 'sco', 'fy', 'nr', 'kl', 'pa', 41 42 'nb-NO', 'oj', 'rm', 'fa-IR', 'da-DK', 'sk-SK', 'ceb', 'su', 'ca-ES', 'mn', 42 - 'ta', 'lg', 'ku', 'tok', 'jp-JP', 'gsw', 'ha', 'wa', 'yue', 'ars' 43 + 'ta', 'lg', 'ku', 'tok', 'jp-JP', 'gsw', 'ha', 'wa', 'yue', 'ars', 44 + -- Extended language codes (4 additional codes for broader coverage) 45 + 'mr', 'te', 'jv', 'kn' 43 46 ); 44 47 45 48 CREATE TYPE facet_type AS ENUM ('mention', 'link', 'tag'); ··· 116 119 rkey INT8 NOT NULL, 117 120 cid BYTEA NOT NULL, 118 121 content BYTEA, 119 - content_version SMALLINT, 120 - language_code language_code, 122 + langs language_code[] NOT NULL DEFAULT '{}', 121 123 tags TEXT[] NOT NULL DEFAULT '{}', 122 124 parent_post_id BIGINT REFERENCES posts(id) ON DELETE SET NULL, 123 125 root_post_id BIGINT REFERENCES posts(id) ON DELETE SET NULL, ··· 127 129 status post_status NOT NULL DEFAULT 'complete', 128 130 search_vector tsvector, 129 131 UNIQUE (actor_id, rkey), 130 - CHECK ( 131 - (content IS NULL AND content_version IS NULL) OR 132 - (content IS NOT NULL AND content_version IS NOT NULL) 133 - ) 132 + CHECK (array_length(langs, 1) IS NULL OR array_length(langs, 1) <= 3) 134 133 ); 135 134 136 - COMMENT ON COLUMN posts.content IS 'Zstd-compressed post content (BYTEA). NULL for stub/deleted posts.'; 137 - COMMENT ON COLUMN posts.content_version IS 'Dictionary version (NULL if no content)'; 135 + COMMENT ON COLUMN posts.content IS 'Zstd-compressed post content (BYTEA). NULL for stub/deleted posts. Dictionary version stored as first byte prefix.'; 136 + COMMENT ON COLUMN posts.langs IS 'Languages of post content (ISO 639-1 codes). Max 3 per AT Protocol spec. Empty array means no language specified.'; 138 137 COMMENT ON COLUMN posts.search_vector IS 'Full-text search vector (NULL except for allowlisted users)'; 139 138 COMMENT ON COLUMN posts.status IS 'Post lifecycle state: complete (normal), stub (placeholder, needs fetch), missing (permanently unfetchable), deleted (soft-deleted), forbidden (access denied)'; 140 139 141 140 CREATE INDEX idx_posts_actor ON posts(actor_id); 142 141 CREATE INDEX idx_posts_parent ON posts(parent_post_id) WHERE parent_post_id IS NOT NULL; 143 142 CREATE INDEX idx_posts_root ON posts(root_post_id) WHERE root_post_id IS NOT NULL; 143 + CREATE INDEX idx_posts_langs ON posts USING GIN(langs); 144 144 CREATE INDEX idx_posts_tags ON posts USING GIN(tags); 145 145 CREATE INDEX idx_posts_status ON posts(status) WHERE status != 'complete'; 146 146 CREATE INDEX idx_posts_search_vector ON posts USING GIN (search_vector) WHERE search_vector IS NOT NULL; ··· 152 152 153 153 ALTER TABLE statuses ADD CONSTRAINT fk_statuses_embed_post 154 154 FOREIGN KEY (embed_post_id) REFERENCES posts(id) ON DELETE SET NULL; 155 + 156 + -- Add indexes on foreign keys for improved DELETE performance 157 + CREATE INDEX idx_profiles_pinned_post ON profiles(pinned_post_id) WHERE pinned_post_id IS NOT NULL; 155 158 156 159 -- ============================================================================= 157 160 -- POST EMBEDS ··· 225 228 ); 226 229 227 230 CREATE INDEX idx_post_facets_mention ON post_facets(mention_actor_id) WHERE mention_actor_id IS NOT NULL; 231 + CREATE INDEX idx_post_facets_link_uri ON post_facets(link_uri_id); 228 232 229 233 -- Post Mentions (denormalized for fast lookups) 230 234 CREATE TABLE post_mentions (

+2

migrations/2025-11-01-114841_engagement/up.sql

··· 32 32 CREATE INDEX idx_likes_actor ON likes(actor_id); 33 33 CREATE INDEX idx_likes_subject ON likes(subject_type, subject_id); 34 34 CREATE INDEX idx_likes_rkey ON likes(rkey); 35 + CREATE INDEX idx_likes_via_post ON likes(via_post_id); 35 36 36 37 -- ============================================================================= 37 38 -- REPOSTS ··· 50 51 CREATE INDEX idx_reposts_actor ON reposts(actor_id); 51 52 CREATE INDEX idx_reposts_post ON reposts(post_id); 52 53 CREATE INDEX idx_reposts_rkey ON reposts(rkey); 54 + CREATE INDEX idx_reposts_via_post ON reposts(via_post_id); 53 55 54 56 -- ============================================================================= 55 57 -- BOOKMARKS

+7 -4

migrations/2025-11-01-114842_notifications/down.sql

··· 1 + -- Drop tables in reverse order 2 + DROP TABLE IF EXISTS notification_state; 3 + DROP TABLE IF EXISTS notifications; 1 4 DROP TABLE IF EXISTS notif_decl; 2 - DROP TABLE IF EXISTS notification_seens; 3 - DROP TABLE IF EXISTS notifications; 5 + 6 + -- Drop types 7 + DROP TYPE IF EXISTS notification_record_type; 8 + DROP TYPE IF EXISTS notification_reason; 4 9 DROP TYPE IF EXISTS notif_allow_subscriptions; 5 - DROP TYPE IF EXISTS notification_reason; 6 - DROP TYPE IF EXISTS notification_record_type;

+73 -2

migrations/2025-11-01-114842_notifications/up.sql

··· 1 1 -- ============================================================================= 2 - -- NOTIFICATION PREFERENCES 2 + -- NOTIFICATIONS 3 3 -- ============================================================================= 4 4 -- 5 - -- Notification preferences only (notifications are stored in Redis) 5 + -- Notification system: preferences, notification events, and user state 6 + -- Optimized storage with INT8 (TID rkeys) and BYTEA (CID digests) 6 7 -- 7 8 -- ============================================================================= 8 9 10 + -- ============================================================================= 11 + -- NOTIFICATION TYPES 12 + -- ============================================================================= 13 + 9 14 CREATE TYPE notif_allow_subscriptions AS ENUM ('none', 'mutuals', 'followers'); 10 15 16 + -- Notification reason enum 17 + -- This enum represents why a notification was created 18 + CREATE TYPE notification_reason AS ENUM ( 19 + 'like', -- User liked a post 20 + 'repost', -- User reposted a post 21 + 'follow', -- User followed someone 22 + 'reply', -- User replied to a post 23 + 'mention', -- User mentioned someone in a post 24 + 'quote' -- User quoted a post 25 + ); 26 + 27 + -- Record type enum for notification records 28 + -- This enum represents the type of record that triggered the notification 29 + CREATE TYPE notification_record_type AS ENUM ( 30 + 'like', -- app.bsky.feed.like 31 + 'repost', -- app.bsky.feed.repost 32 + 'follow', -- app.bsky.graph.follow 33 + 'post' -- app.bsky.feed.post (used for reply/mention/quote) 34 + ); 35 + 36 + -- ============================================================================= 37 + -- NOTIFICATION PREFERENCES 38 + -- ============================================================================= 39 + 11 40 -- Notification preferences 12 41 CREATE TABLE notif_decl ( 13 42 actor_id INTEGER PRIMARY KEY REFERENCES actors(id) ON DELETE CASCADE, 14 43 allow_subscriptions notif_allow_subscriptions, 15 44 created_at TIMESTAMPTZ NOT NULL DEFAULT NOW() 16 45 ); 46 + 47 + -- ============================================================================= 48 + -- NOTIFICATIONS TABLE 49 + -- ============================================================================= 50 + 51 + -- Notifications table with optimized storage 52 + CREATE TABLE notifications ( 53 + id BIGSERIAL PRIMARY KEY, 54 + recipient_actor_id INT NOT NULL REFERENCES actors(id) ON DELETE CASCADE, 55 + author_actor_id INT NOT NULL REFERENCES actors(id) ON DELETE CASCADE, 56 + indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), 57 + reason notification_reason NOT NULL, 58 + record_type notification_record_type NOT NULL, 59 + record_rkey INT8 NOT NULL, 60 + record_cid BYTEA NOT NULL, 61 + subject_actor_id INT REFERENCES actors(id) ON DELETE CASCADE, 62 + subject_record_type notification_record_type, 63 + subject_rkey INT8, 64 + UNIQUE (author_actor_id, record_type, record_rkey) 65 + ); 66 + 67 + COMMENT ON COLUMN notifications.record_rkey IS 'TID as INT8 (convert to string via i64_to_tid())'; 68 + COMMENT ON COLUMN notifications.record_cid IS '32-byte CID digest (header stripped, reconstruct via digest_to_record_cid())'; 69 + COMMENT ON COLUMN notifications.subject_rkey IS 'Optional TID as INT8 for subject record'; 70 + 71 + -- Indexes for notifications 72 + CREATE INDEX idx_notifications_recipient_time 73 + ON notifications(recipient_actor_id, indexed_at DESC); 74 + 75 + CREATE INDEX idx_notifications_indexed_at 76 + ON notifications(indexed_at); 77 + 78 + -- ============================================================================= 79 + -- NOTIFICATION STATE 80 + -- ============================================================================= 81 + 82 + -- Per-user notification state 83 + CREATE TABLE notification_state ( 84 + actor_id INT PRIMARY KEY REFERENCES actors(id) ON DELETE CASCADE, 85 + seen_at TIMESTAMPTZ, 86 + unread_count INTEGER DEFAULT 0 87 + );

+1 -1

migrations/2025-11-01-114846_moderation/up.sql

··· 14 14 CREATE TYPE subject_type AS ENUM ('account', 'record', 'chat'); 15 15 CREATE TYPE label_severity AS ENUM ('inform', 'alert', 'none'); 16 16 CREATE TYPE label_blurs AS ENUM ('content', 'media', 'none'); 17 - CREATE TYPE label_default_setting AS ENUM ('ignore', 'warn', 'hide'); 17 + CREATE TYPE label_default_setting AS ENUM ('ignore', 'warn', 'hide', 'inform', 'show'); 18 18 19 19 -- ============================================================================= 20 20 -- LABELERS

+11

migrations/2025-11-01-114847_queues_and_extensions/down.sql

··· 1 + -- Drop queue tables 2 + DROP TABLE IF EXISTS constellation_enrichment_queue; 3 + DROP TABLE IF EXISTS handle_resolution_queue; 4 + DROP TABLE IF EXISTS jetstream_cursors; 5 + DROP TABLE IF EXISTS backfill_jobs; 6 + DROP TABLE IF EXISTS fetch_queue; 7 + 8 + -- Drop extensions 9 + DROP EXTENSION IF EXISTS pgrouting; 10 + DROP EXTENSION IF EXISTS postgis; 11 + DROP EXTENSION IF EXISTS pg_stat_statements;

+143

migrations/2025-11-01-114847_queues_and_extensions/up.sql

··· 1 + -- ============================================================================= 2 + -- QUEUES & EXTENSIONS 3 + -- ============================================================================= 4 + -- 5 + -- Queue tables for async processing and PostgreSQL extensions 6 + -- 7 + -- ============================================================================= 8 + 9 + -- ============================================================================= 10 + -- POSTGRESQL EXTENSIONS 11 + -- ============================================================================= 12 + 13 + -- pg_stat_statements: Query performance monitoring and optimization 14 + -- Tracks execution statistics for all SQL statements, enabling identification 15 + -- of slow queries and optimization opportunities. Configured in postgresql.conf 16 + -- via shared_preload_libraries for optimal performance. 17 + CREATE EXTENSION IF NOT EXISTS pg_stat_statements; 18 + 19 + -- PostGIS: Geospatial extensions (required dependency for pgRouting) 20 + -- While Parakeet doesn't use geospatial features directly, this is a 21 + -- prerequisite for pgRouting's graph analysis capabilities. 22 + CREATE EXTENSION IF NOT EXISTS postgis; 23 + 24 + -- pgRouting: Graph analysis algorithms for social network analysis 25 + -- Enables advanced graph queries on the follows table, including: 26 + -- - Shortest path between users (connection discovery) 27 + -- - Community detection (strongly connected components) 28 + -- - Centrality metrics (influential users, key connectors) 29 + -- - Multi-hop neighborhood queries (N-degree connections) 30 + CREATE EXTENSION IF NOT EXISTS pgrouting; 31 + 32 + -- Note: timescaledb and pgvector are available via flake.nix but not enabled yet 33 + -- 34 + -- timescaledb: Time-series optimization for notifications and stats tables 35 + -- Deferred until we design schema migration strategy (hypertables, partitioning) 36 + -- Future use: trending posts, notification retention policies, time-bucketing 37 + -- 38 + -- pgvector: Vector similarity search for semantic/ML-based features 39 + -- Deferred until concrete use case emerges (embeddings, semantic search) 40 + -- Future use: content-based recommendations, duplicate detection 41 + 42 + -- ============================================================================= 43 + -- FETCH QUEUE 44 + -- ============================================================================= 45 + 46 + -- Fetch queue table to replace Redis list-based queue 47 + CREATE TABLE fetch_queue ( 48 + id BIGSERIAL PRIMARY KEY, 49 + at_uri TEXT NOT NULL UNIQUE, 50 + attempts INTEGER NOT NULL DEFAULT 0, 51 + max_attempts INTEGER NOT NULL DEFAULT 3, 52 + status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, failed 53 + last_error TEXT, 54 + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), 55 + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() 56 + ); 57 + 58 + -- Index for queue processing (dequeue operations) 59 + CREATE INDEX idx_fetch_queue_status_created 60 + ON fetch_queue(status, created_at) 61 + WHERE status IN ('pending', 'processing'); 62 + 63 + -- ============================================================================= 64 + -- BACKFILL JOBS 65 + -- ============================================================================= 66 + 67 + -- Backfill jobs table to replace Redis job management 68 + CREATE TABLE backfill_jobs ( 69 + did TEXT PRIMARY KEY, 70 + status TEXT NOT NULL DEFAULT 'pending', 71 + -- Status values: pending, processing, successful, failed.retry, failed.permanent 72 + attempts INTEGER NOT NULL DEFAULT 0, 73 + max_attempts INTEGER NOT NULL DEFAULT 3, 74 + last_error TEXT, 75 + scheduled_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), -- For exponential backoff 76 + started_at TIMESTAMPTZ, 77 + completed_at TIMESTAMPTZ 78 + ); 79 + 80 + -- Index for queue processing (dequeue operations) 81 + CREATE INDEX idx_backfill_status_scheduled 82 + ON backfill_jobs(status, scheduled_at) 83 + WHERE status IN ('pending', 'failed.retry'); 84 + 85 + -- Index for detecting stale processing jobs 86 + CREATE INDEX idx_backfill_processing_stale 87 + ON backfill_jobs(status, started_at) 88 + WHERE status = 'processing'; 89 + 90 + -- ============================================================================= 91 + -- JETSTREAM CURSORS 92 + -- ============================================================================= 93 + 94 + -- Jetstream cursors table to replace Redis cursor storage 95 + CREATE TABLE jetstream_cursors ( 96 + partition TEXT PRIMARY KEY, -- e.g., "posts", "likes", "social" 97 + cursor_value BIGINT NOT NULL, -- Timestamp in microseconds 98 + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() 99 + ); 100 + 101 + -- ============================================================================= 102 + -- HANDLE RESOLUTION QUEUE 103 + -- ============================================================================= 104 + 105 + CREATE TABLE handle_resolution_queue ( 106 + id BIGSERIAL PRIMARY KEY, 107 + did TEXT NOT NULL UNIQUE, 108 + attempts INTEGER NOT NULL DEFAULT 0, 109 + max_attempts INTEGER NOT NULL DEFAULT 3, 110 + status TEXT NOT NULL DEFAULT 'pending', 111 + last_error TEXT, 112 + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), 113 + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() 114 + ); 115 + 116 + CREATE INDEX idx_handle_resolution_status_created 117 + ON handle_resolution_queue (status, created_at) 118 + WHERE status IN ('pending', 'processing'); 119 + 120 + COMMENT ON TABLE handle_resolution_queue IS 'Queue for asynchronous DID handle resolution'; 121 + 122 + -- ============================================================================= 123 + -- CONSTELLATION ENRICHMENT QUEUE 124 + -- ============================================================================= 125 + 126 + CREATE TABLE constellation_enrichment_queue ( 127 + id BIGSERIAL PRIMARY KEY, 128 + did TEXT NOT NULL, 129 + post_uris JSONB NOT NULL, -- Array of post URIs for this actor 130 + attempts INTEGER NOT NULL DEFAULT 0, 131 + max_attempts INTEGER NOT NULL DEFAULT 3, 132 + status TEXT NOT NULL DEFAULT 'pending', 133 + last_error TEXT, 134 + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), 135 + updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), 136 + UNIQUE(did) -- One enrichment job per DID 137 + ); 138 + 139 + CREATE INDEX idx_constellation_status_created 140 + ON constellation_enrichment_queue (status, created_at) 141 + WHERE status IN ('pending', 'processing'); 142 + 143 + COMMENT ON TABLE constellation_enrichment_queue IS 'Queue for asynchronous Constellation API enrichment';

-6

migrations/2025-11-10-014835_add_missing_fk_indexes/down.sql

··· 1 - -- Remove indexes added for foreign key performance 2 - 3 - DROP INDEX IF EXISTS idx_likes_via_post; 4 - DROP INDEX IF EXISTS idx_reposts_via_post; 5 - DROP INDEX IF EXISTS idx_profiles_pinned_post; 6 - DROP INDEX IF EXISTS idx_post_facets_link_uri;

-28

migrations/2025-11-10-014835_add_missing_fk_indexes/up.sql

··· 1 - -- Add missing indexes on foreign key columns 2 - -- 3 - -- These indexes are critical for performance when deleting posts or other referenced records. 4 - -- Without them, DELETE CASCADE operations require full table scans. 5 - -- 6 - -- IMPORTANT: For production databases, run these with CONCURRENTLY to avoid locking: 7 - -- CREATE INDEX CONCURRENTLY idx_likes_via_post ON likes(via_post_id); 8 - -- CREATE INDEX CONCURRENTLY idx_reposts_via_post ON reposts(via_post_id); 9 - -- CREATE INDEX CONCURRENTLY idx_profiles_pinned_post ON profiles(pinned_post_id) WHERE pinned_post_id IS NOT NULL; 10 - -- CREATE INDEX CONCURRENTLY idx_post_facets_link_uri ON post_facets(link_uri_id); 11 - 12 - -- CRITICAL: Improves post deletion performance 13 - -- Without this, deleting a post scans all 949K+ likes rows 14 - CREATE INDEX IF NOT EXISTS idx_likes_via_post ON likes(via_post_id); 15 - 16 - -- CRITICAL: Improves post deletion performance 17 - -- Without this, deleting a post scans all 61K+ reposts rows 18 - CREATE INDEX IF NOT EXISTS idx_reposts_via_post ON reposts(via_post_id); 19 - 20 - -- HIGH PRIORITY: Improves post deletion performance for pinned posts 21 - -- Partial index only covers ~70 profiles with pinned posts (out of 381 total) 22 - CREATE INDEX IF NOT EXISTS idx_profiles_pinned_post 23 - ON profiles(pinned_post_id) 24 - WHERE pinned_post_id IS NOT NULL; 25 - 26 - -- MEDIUM PRIORITY: Improves URI deletion performance 27 - -- Covers 9,451 rows with link facets 28 - CREATE INDEX IF NOT EXISTS idx_post_facets_link_uri ON post_facets(link_uri_id);

-3

migrations/2025-11-11-055323_notifications_table/down.sql

··· 1 - -- Drop notifications tables 2 - DROP TABLE IF EXISTS notification_state; 3 - DROP TABLE IF EXISTS notifications;

-30

migrations/2025-11-11-055323_notifications_table/up.sql

··· 1 - -- Create notifications table to replace Redis sorted sets 2 - CREATE TABLE notifications ( 3 - id BIGSERIAL PRIMARY KEY, 4 - recipient_did TEXT NOT NULL, 5 - author_did TEXT NOT NULL, 6 - uri TEXT NOT NULL UNIQUE, -- For deduplication (e.g., at://did/app.bsky.feed.like/rkey) 7 - cid TEXT NOT NULL, 8 - reason TEXT NOT NULL, -- like, repost, follow, reply, mention, quote 9 - reason_subject TEXT, -- Optional subject URI (e.g., the post that was liked) 10 - indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW() 11 - ); 12 - 13 - -- Index for listing notifications by recipient (pagination) 14 - CREATE INDEX idx_notifications_recipient_time 15 - ON notifications(recipient_did, indexed_at DESC); 16 - 17 - -- Index for deletion by URI 18 - CREATE INDEX idx_notifications_uri 19 - ON notifications(uri); 20 - 21 - -- Index for cleanup worker (delete old notifications) 22 - CREATE INDEX idx_notifications_indexed_at 23 - ON notifications(indexed_at); 24 - 25 - -- Per-user notification state 26 - CREATE TABLE notification_state ( 27 - did TEXT PRIMARY KEY, 28 - seen_at TIMESTAMPTZ, 29 - unread_count INTEGER DEFAULT 0 30 - );

-2

migrations/2025-11-11-055341_fetch_queue_table/down.sql

··· 1 - -- Drop fetch queue table 2 - DROP TABLE IF EXISTS fetch_queue;

-16

migrations/2025-11-11-055341_fetch_queue_table/up.sql

··· 1 - -- Create fetch queue table to replace Redis list-based queue 2 - CREATE TABLE fetch_queue ( 3 - id BIGSERIAL PRIMARY KEY, 4 - at_uri TEXT NOT NULL UNIQUE, 5 - attempts INTEGER NOT NULL DEFAULT 0, 6 - max_attempts INTEGER NOT NULL DEFAULT 3, 7 - status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, failed 8 - last_error TEXT, 9 - created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), 10 - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() 11 - ); 12 - 13 - -- Index for queue processing (dequeue operations) 14 - CREATE INDEX idx_fetch_queue_status_created 15 - ON fetch_queue(status, created_at) 16 - WHERE status IN ('pending', 'processing');

-2

migrations/2025-11-11-055354_backfill_jobs_table/down.sql

··· 1 - -- Drop backfill jobs table 2 - DROP TABLE IF EXISTS backfill_jobs;

-22

migrations/2025-11-11-055354_backfill_jobs_table/up.sql

··· 1 - -- Create backfill jobs table to replace Redis job management 2 - CREATE TABLE backfill_jobs ( 3 - did TEXT PRIMARY KEY, 4 - status TEXT NOT NULL DEFAULT 'pending', 5 - -- Status values: pending, processing, successful, failed.retry, failed.permanent 6 - attempts INTEGER NOT NULL DEFAULT 0, 7 - max_attempts INTEGER NOT NULL DEFAULT 3, 8 - last_error TEXT, 9 - scheduled_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), -- For exponential backoff 10 - started_at TIMESTAMPTZ, 11 - completed_at TIMESTAMPTZ 12 - ); 13 - 14 - -- Index for queue processing (dequeue operations) 15 - CREATE INDEX idx_backfill_status_scheduled 16 - ON backfill_jobs(status, scheduled_at) 17 - WHERE status IN ('pending', 'failed.retry'); 18 - 19 - -- Index for detecting stale processing jobs 20 - CREATE INDEX idx_backfill_processing_stale 21 - ON backfill_jobs(status, started_at) 22 - WHERE status = 'processing';

-2

migrations/2025-11-11-055409_jetstream_cursors_table/down.sql

··· 1 - -- Drop jetstream cursors table 2 - DROP TABLE IF EXISTS jetstream_cursors;

-6

migrations/2025-11-11-055409_jetstream_cursors_table/up.sql

··· 1 - -- Create jetstream cursors table to replace Redis cursor storage 2 - CREATE TABLE jetstream_cursors ( 3 - partition TEXT PRIMARY KEY, -- e.g., "posts", "likes", "social" 4 - cursor_value BIGINT NOT NULL, -- Timestamp in microseconds 5 - updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() 6 - );

-1

migrations/2025-11-11-135750_handle_resolution_queue/down.sql

··· 1 - DROP TABLE IF EXISTS handle_resolution_queue;

-16

migrations/2025-11-11-135750_handle_resolution_queue/up.sql

··· 1 - CREATE TABLE handle_resolution_queue ( 2 - id BIGSERIAL PRIMARY KEY, 3 - did TEXT NOT NULL UNIQUE, 4 - attempts INTEGER NOT NULL DEFAULT 0, 5 - max_attempts INTEGER NOT NULL DEFAULT 3, 6 - status TEXT NOT NULL DEFAULT 'pending', 7 - last_error TEXT, 8 - created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), 9 - updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() 10 - ); 11 - 12 - CREATE INDEX idx_handle_resolution_status_created 13 - ON handle_resolution_queue (status, created_at) 14 - WHERE status IN ('pending', 'processing'); 15 - 16 - COMMENT ON TABLE handle_resolution_queue IS 'Queue for asynchronous DID handle resolution';

-1

migrations/2025-11-11-135751_constellation_enrichment_queue/down.sql

··· 1 - DROP TABLE IF EXISTS constellation_enrichment_queue;

-18

migrations/2025-11-11-135751_constellation_enrichment_queue/up.sql

··· 1 - CREATE TABLE constellation_enrichment_queue ( 2 - id BIGSERIAL PRIMARY KEY, 3 - did TEXT NOT NULL, 4 - post_uris JSONB NOT NULL, -- Array of post URIs for this actor 5 - attempts INTEGER NOT NULL DEFAULT 0, 6 - max_attempts INTEGER NOT NULL DEFAULT 3, 7 - status TEXT NOT NULL DEFAULT 'pending', 8 - last_error TEXT, 9 - created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), 10 - updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(), 11 - UNIQUE(did) -- One enrichment job per DID 12 - ); 13 - 14 - CREATE INDEX idx_constellation_status_created 15 - ON constellation_enrichment_queue (status, created_at) 16 - WHERE status IN ('pending', 'processing'); 17 - 18 - COMMENT ON TABLE constellation_enrichment_queue IS 'Queue for asynchronous Constellation API enrichment';

+5 -3

migrations/2025-11-11-214925_add_aggregate_stats_table/down.sql migrations/2025-11-01-114848_aggregate_stats/down.sql

··· 1 - -- Drop aggregate stats tables and enums 2 - DROP TABLE IF EXISTS post_aggregate_stats; 1 + -- Drop aggregate stats tables 3 2 DROP TABLE IF EXISTS actor_aggregate_stats; 4 - DROP TYPE IF EXISTS post_stat_type; 3 + DROP TABLE IF EXISTS post_aggregate_stats; 4 + 5 + -- Drop enums 5 6 DROP TYPE IF EXISTS actor_stat_type; 7 + DROP TYPE IF EXISTS post_stat_type;

+20 -1

migrations/2025-11-11-214925_add_aggregate_stats_table/up.sql migrations/2025-11-01-114848_aggregate_stats/up.sql

··· 1 - -- Create enums for stat types 1 + -- ============================================================================= 2 + -- AGGREGATE STATISTICS 3 + -- ============================================================================= 4 + -- 5 + -- Post and actor aggregate statistics with automatic cleanup 6 + -- 7 + -- ============================================================================= 8 + 9 + -- ============================================================================= 10 + -- STAT TYPE ENUMS 11 + -- ============================================================================= 12 + 2 13 -- Post-specific stats 3 14 CREATE TYPE post_stat_type AS ENUM ( 4 15 'like', -- 3: LIKE ··· 17 28 'profile_starterpack' -- 10: PROFILE_STARTERPACK 18 29 ); 19 30 31 + -- ============================================================================= 32 + -- AGGREGATE STATS TABLES 33 + -- ============================================================================= 34 + 20 35 -- Post aggregate statistics 21 36 -- Stores engagement metrics for posts with automatic cleanup 22 37 CREATE TABLE post_aggregate_stats ( ··· 38 53 39 54 PRIMARY KEY (actor_id, stat_type) 40 55 ); 56 + 57 + -- ============================================================================= 58 + -- INDEXES 59 + -- ============================================================================= 41 60 42 61 -- Index for finding hot posts (recently updated engagement) 43 62 CREATE INDEX idx_post_aggregate_stats_updated_at

-17

migrations/2025-11-12-030603_fix_label_default_setting_enum/down.sql

··· 1 - -- PostgreSQL does not support removing enum values directly. 2 - -- To rollback this migration, you would need to: 3 - -- 1. Ensure no rows use 'inform' or 'show' values 4 - -- 2. Drop and recreate the enum type (requires dropping dependent columns first) 5 - -- 3. Recreate all tables/columns that use this type 6 - -- 7 - -- Since this is impractical and the new values are required for compatibility 8 - -- with real production labelers, rollback is not supported. 9 - 10 - -- If you absolutely need to rollback, manually run: 11 - -- BEGIN; 12 - -- -- Delete any rows using the new values 13 - -- DELETE FROM labeler_defs WHERE default_setting IN ('inform', 'show'); 14 - -- -- Then drop and recreate the type (complex, requires recreating dependent tables) 15 - -- ROLLBACK; -- Don't actually do this without a full backup! 16 - 17 - SELECT 1; -- No-op to make diesel happy

-6

migrations/2025-11-12-030603_fix_label_default_setting_enum/up.sql

··· 1 - -- Add missing enum values to label_default_setting 2 - -- These values are used by real labelers in production (e.g., Khronos uses 'inform', Bluesky Moderation uses 'show') 3 - -- even though they're not in the AT Protocol spec's "knownValues" 4 - 5 - ALTER TYPE label_default_setting ADD VALUE IF NOT EXISTS 'inform'; 6 - ALTER TYPE label_default_setting ADD VALUE IF NOT EXISTS 'show';

-4

migrations/2025-11-13-091852_drop_post_content_version/down.sql

··· 1 - -- Restore the content_version column 2 - -- Note: This is not truly reversible as we've lost the original version data 3 - -- The column will be restored but values will be NULL 4 - ALTER TABLE posts ADD COLUMN content_version smallint;

-4

migrations/2025-11-13-091852_drop_post_content_version/up.sql

··· 1 - -- Drop the content_version column from posts table 2 - -- This column is no longer needed as we now use a single-byte 3 - -- dictionary version prefix in the compressed content itself 4 - ALTER TABLE posts DROP COLUMN content_version;

-40

migrations/2025-11-13-122954_expand_language_codes_and_array_support/down.sql

··· 1 - -- ============================================================================ 2 - -- MIGRATION ROLLBACK: Revert language codes expansion and array conversion 3 - -- ============================================================================ 4 - -- This rollback: 5 - -- 1. Drops GIN index on langs 6 - -- 2. Drops max-3 constraint 7 - -- 3. Converts langs array back to single language_code 8 - -- 4. Renames column back to language_code 9 - -- 5. Does NOT remove enum values (PostgreSQL doesn't support removing enum values) 10 - -- 11 - -- WARNING: This will lose language data if posts have multiple languages! 12 - -- Only the first language will be preserved. 13 - 14 - -- Step 1: Drop GIN index 15 - DROP INDEX IF EXISTS idx_posts_langs; 16 - 17 - -- Step 2: Drop max-3 constraint 18 - ALTER TABLE posts 19 - DROP CONSTRAINT IF EXISTS posts_langs_max_3; 20 - 21 - -- Step 3: Rename column back 22 - ALTER TABLE posts 23 - RENAME COLUMN langs TO language_code; 24 - 25 - -- Step 4: Convert array back to single enum 26 - -- WARNING: Data loss! Only keeps first language from array 27 - ALTER TABLE posts 28 - ALTER COLUMN language_code TYPE language_code 29 - USING CASE 30 - WHEN array_length(language_code, 1) > 0 THEN language_code[1] 31 - ELSE NULL 32 - END; 33 - 34 - -- Step 5: Remove comment 35 - COMMENT ON COLUMN posts.language_code IS NULL; 36 - 37 - -- Note: Cannot remove enum values from PostgreSQL enum type 38 - -- The following values will remain in the enum but won't be used: 39 - -- id, th, vi, cs, ro, el, he, bn, ca, fa, sk, bg, hr, lt, lv, sl, et, ga, cy, is, mt, eu, ms, tl, ur, sr, hu, la 40 - -- This is a PostgreSQL limitation - enum values cannot be removed once added

-97

migrations/2025-11-13-122954_expand_language_codes_and_array_support/up.sql

··· 1 - -- ============================================================================ 2 - -- MIGRATION: Expand language codes and convert to array support 3 - -- ============================================================================ 4 - -- This migration: 5 - -- 1. Adds 26 new language codes (id, th, vi, cs, ro, el, he, bn, ca, fa, etc.) 6 - -- 2. Converts posts.language_code from single enum to language_code[] array 7 - -- 3. Preserves existing data during conversion 8 - -- 4. Adds constraints for AT Protocol spec (max 3 languages) 9 - -- 10 - -- AT Protocol spec: Posts can have up to 3 language codes in langs field 11 - -- Previous implementation only stored first language, this fixes data loss 12 - 13 - -- Step 1: Add all new language codes to the enum 14 - -- Note: PostgreSQL requires adding enum values one at a time, cannot be in transaction 15 - -- Based on: 16 - -- - W3Techs March 2025: Top 40 internet content languages 17 - -- - Ethnologue 2025: All languages with 50M+ total speakers 18 - -- - Ethnologue 2025: All languages with 50M+ native speakers 19 - -- Total: 38 new codes added (from 22 to 60 total) 20 - 21 - -- Major Asian Languages (10 codes) 22 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'id'; -- Indonesian (252M total speakers, 1.1% web) 23 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'th'; -- Thai (71M total speakers, 0.3% web) 24 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'vi'; -- Vietnamese (97M total speakers, 1.1% web) 25 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'bn'; -- Bengali (284M total speakers) 26 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ur'; -- Urdu (246M total speakers) 27 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'mr'; -- Marathi (99M total speakers) 28 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'te'; -- Telugu (96M total speakers) 29 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ta'; -- Tamil (86M total speakers) 30 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'jv'; -- Javanese (69M total speakers) 31 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'gu'; -- Gujarati (62M total speakers) 32 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'kn'; -- Kannada (59M total speakers) 33 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'pa'; -- Punjabi (90M total speakers) 34 - 35 - -- European Languages (16 codes) 36 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'cs'; -- Czech (1.0% web) 37 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ro'; -- Romanian (0.5% web) 38 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'el'; -- Greek (0.5% web) 39 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'sk'; -- Slovak (0.4% web) 40 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'bg'; -- Bulgarian (0.3% web) 41 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'hr'; -- Croatian (0.2% web) 42 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'lt'; -- Lithuanian (0.2% web) 43 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'sl'; -- Slovenian (0.1% web) 44 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'lv'; -- Latvian (0.1% web) 45 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'et'; -- Estonian (0.1% web) 46 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'sr'; -- Serbian (0.2% web) 47 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'bs'; -- Bosnian (0.1% web) 48 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ca'; -- Catalan (0.1% web) 49 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'eu'; -- Basque 50 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ga'; -- Irish 51 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'cy'; -- Welsh 52 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'is'; -- Icelandic 53 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'mt'; -- Maltese 54 - 55 - -- Middle Eastern & North African (2 codes) 56 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'fa'; -- Persian/Farsi (83M total speakers, 1.2% web) 57 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'he'; -- Hebrew (0.4% web) 58 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'am'; -- Amharic (60M total speakers) 59 - 60 - -- African Languages (2 codes) 61 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'sw'; -- Swahili (87M total speakers) 62 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ha'; -- Hausa (94M total speakers) 63 - 64 - -- Southeast Asian (2 codes) 65 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'tl'; -- Tagalog/Filipino (87M total speakers) 66 - ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ms'; -- Malay 67 - 68 - -- Step 2: Convert posts.language_code from single enum to array 69 - -- This preserves existing data by converting NULL -> empty array, single value -> single-element array 70 - 71 - ALTER TABLE posts 72 - ALTER COLUMN language_code TYPE language_code[] 73 - USING CASE 74 - WHEN language_code IS NOT NULL THEN ARRAY[language_code] 75 - ELSE ARRAY[]::language_code[] 76 - END; 77 - 78 - -- Step 3: Rename column to match AT Protocol naming (langs) 79 - ALTER TABLE posts 80 - RENAME COLUMN language_code TO langs; 81 - 82 - -- Step 3.5: Set NOT NULL and default (empty array means no language specified) 83 - ALTER TABLE posts 84 - ALTER COLUMN langs SET NOT NULL, 85 - ALTER COLUMN langs SET DEFAULT ARRAY[]::language_code[]; 86 - 87 - -- Step 4: Add constraint for AT Protocol spec (max 3 languages per post) 88 - ALTER TABLE posts 89 - ADD CONSTRAINT posts_langs_max_3 90 - CHECK (array_length(langs, 1) IS NULL OR array_length(langs, 1) <= 3); 91 - 92 - -- Step 5: Add GIN index for efficient language searching 93 - -- This allows queries like "find all posts in Japanese OR English" 94 - CREATE INDEX idx_posts_langs ON posts USING GIN (langs); 95 - 96 - -- Step 6: Update comment on column 97 - COMMENT ON COLUMN posts.langs IS 'Languages of post content (ISO 639-1 codes). Max 3 per AT Protocol spec. Empty array means no language specified.';

-14

migrations/2025-11-13-213616_enable_postgresql_extensions/down.sql

··· 1 - -- Rollback PostgreSQL extensions 2 - -- 3 - -- Note: Extensions are dropped in reverse dependency order 4 - -- (pgrouting depends on postgis, so drop pgrouting first) 5 - 6 - -- Drop pgRouting (graph analysis for social networks) 7 - DROP EXTENSION IF EXISTS pgrouting; 8 - 9 - -- Drop PostGIS (required by pgRouting, safe to drop after pgrouting) 10 - DROP EXTENSION IF EXISTS postgis; 11 - 12 - -- Drop pg_stat_statements (query performance monitoring) 13 - -- Note: This will clear all accumulated query statistics 14 - DROP EXTENSION IF EXISTS pg_stat_statements;

-30

migrations/2025-11-13-213616_enable_postgresql_extensions/up.sql

··· 1 - -- Enable PostgreSQL extensions for enhanced functionality 2 - -- 3 - -- pg_stat_statements: Query performance monitoring and optimization 4 - -- Tracks execution statistics for all SQL statements, enabling identification 5 - -- of slow queries and optimization opportunities. Configured in postgresql.conf 6 - -- via shared_preload_libraries for optimal performance. 7 - CREATE EXTENSION IF NOT EXISTS pg_stat_statements; 8 - 9 - -- PostGIS: Geospatial extensions (required dependency for pgRouting) 10 - -- While Parakeet doesn't use geospatial features directly, this is a 11 - -- prerequisite for pgRouting's graph analysis capabilities. 12 - CREATE EXTENSION IF NOT EXISTS postgis; 13 - 14 - -- pgRouting: Graph analysis algorithms for social network analysis 15 - -- Enables advanced graph queries on the follows table, including: 16 - -- - Shortest path between users (connection discovery) 17 - -- - Community detection (strongly connected components) 18 - -- - Centrality metrics (influential users, key connectors) 19 - -- - Multi-hop neighborhood queries (N-degree connections) 20 - CREATE EXTENSION IF NOT EXISTS pgrouting; 21 - 22 - -- Note: timescaledb and pgvector are available via flake.nix but not enabled yet 23 - -- 24 - -- timescaledb: Time-series optimization for notifications and stats tables 25 - -- Deferred until we design schema migration strategy (hypertables, partitioning) 26 - -- Future use: trending posts, notification retention policies, time-bucketing 27 - -- 28 - -- pgvector: Vector similarity search for semantic/ML-based features 29 - -- Deferred until concrete use case emerges (embeddings, semantic search) 30 - -- Future use: content-based recommendations, duplicate detection

-33

migrations/2025-11-13-221911_refactor_notifications_to_actor_ids/down.sql

··· 1 - -- Revert notifications table back to using DIDs 2 - 3 - -- Step 1: Add back DID columns (nullable initially) 4 - ALTER TABLE notifications 5 - ADD COLUMN recipient_did TEXT, 6 - ADD COLUMN author_did TEXT; 7 - 8 - -- Step 2: Populate DIDs from actor_ids by joining with actors table 9 - UPDATE notifications n 10 - SET 11 - recipient_did = (SELECT did FROM actors WHERE id = n.recipient_actor_id), 12 - author_did = (SELECT did FROM actors WHERE id = n.author_actor_id); 13 - 14 - -- Step 3: Make DID columns NOT NULL 15 - ALTER TABLE notifications 16 - ALTER COLUMN recipient_did SET NOT NULL, 17 - ALTER COLUMN author_did SET NOT NULL; 18 - 19 - -- Step 4: Drop foreign key constraints 20 - ALTER TABLE notifications 21 - DROP CONSTRAINT IF EXISTS fk_notifications_recipient, 22 - DROP CONSTRAINT IF EXISTS fk_notifications_author; 23 - 24 - -- Step 5: Drop actor_id columns 25 - ALTER TABLE notifications 26 - DROP COLUMN recipient_actor_id, 27 - DROP COLUMN author_actor_id; 28 - 29 - -- Step 6: Recreate original indexes 30 - DROP INDEX IF EXISTS idx_notifications_recipient_actor_time; 31 - DROP INDEX IF EXISTS idx_notifications_author_actor; 32 - CREATE INDEX idx_notifications_recipient_time 33 - ON notifications(recipient_did, indexed_at DESC);

-42

migrations/2025-11-13-221911_refactor_notifications_to_actor_ids/up.sql

··· 1 - -- Refactor notifications table to use actor_ids instead of DIDs 2 - -- This improves performance and consistency with other tables 3 - 4 - -- Step 1: Add new actor_id columns (nullable initially for safe migration) 5 - ALTER TABLE notifications 6 - ADD COLUMN recipient_actor_id INT, 7 - ADD COLUMN author_actor_id INT; 8 - 9 - -- Step 2: Populate the new columns from existing DIDs 10 - -- This requires joining with the actors table 11 - UPDATE notifications n 12 - SET 13 - recipient_actor_id = (SELECT id FROM actors WHERE did = n.recipient_did), 14 - author_actor_id = (SELECT id FROM actors WHERE did = n.author_did); 15 - 16 - -- Step 3: Make the columns NOT NULL (after populating) 17 - ALTER TABLE notifications 18 - ALTER COLUMN recipient_actor_id SET NOT NULL, 19 - ALTER COLUMN author_actor_id SET NOT NULL; 20 - 21 - -- Step 4: Drop old DID columns 22 - ALTER TABLE notifications 23 - DROP COLUMN recipient_did, 24 - DROP COLUMN author_did; 25 - 26 - -- Step 5: Add foreign key constraints 27 - ALTER TABLE notifications 28 - ADD CONSTRAINT fk_notifications_recipient 29 - FOREIGN KEY (recipient_actor_id) REFERENCES actors(id) ON DELETE CASCADE, 30 - ADD CONSTRAINT fk_notifications_author 31 - FOREIGN KEY (author_actor_id) REFERENCES actors(id) ON DELETE CASCADE; 32 - 33 - -- Step 6: Recreate index for recipient queries (most common use case) 34 - -- The old index was on (recipient_did, indexed_at DESC) 35 - -- We replace it with (recipient_actor_id, indexed_at DESC) 36 - DROP INDEX IF EXISTS idx_notifications_recipient_time; 37 - CREATE INDEX idx_notifications_recipient_actor_time 38 - ON notifications(recipient_actor_id, indexed_at DESC); 39 - 40 - -- Step 7: Add index for author queries (for completeness) 41 - CREATE INDEX idx_notifications_author_actor 42 - ON notifications(author_actor_id);

-3

migrations/2025-11-14-012931_create_notification_enums/down.sql

··· 1 - -- Drop notification enums (reverse order of creation) 2 - DROP TYPE IF EXISTS notification_record_type; 3 - DROP TYPE IF EXISTS notification_reason;

-19

migrations/2025-11-14-012931_create_notification_enums/up.sql

··· 1 - -- Create notification reason enum 2 - -- This enum represents why a notification was created 3 - CREATE TYPE notification_reason AS ENUM ( 4 - 'like', -- User liked a post 5 - 'repost', -- User reposted a post 6 - 'follow', -- User followed someone 7 - 'reply', -- User replied to a post 8 - 'mention', -- User mentioned someone in a post 9 - 'quote' -- User quoted a post 10 - ); 11 - 12 - -- Create record type enum for notification records 13 - -- This enum represents the type of record that triggered the notification 14 - CREATE TYPE notification_record_type AS ENUM ( 15 - 'like', -- app.bsky.feed.like 16 - 'repost', -- app.bsky.feed.repost 17 - 'follow', -- app.bsky.graph.follow 18 - 'post' -- app.bsky.feed.post (used for reply/mention/quote) 19 - );

-12

migrations/2025-11-14-013020_add_normalized_notification_columns/down.sql

··· 1 - -- Drop normalized notification columns (reverse order) 2 - 3 - DROP INDEX IF EXISTS idx_notifications_subject_actor; 4 - 5 - ALTER TABLE notifications 6 - DROP COLUMN IF EXISTS subject_rkey, 7 - DROP COLUMN IF EXISTS subject_record_type, 8 - DROP COLUMN IF EXISTS subject_actor_id, 9 - DROP COLUMN IF EXISTS record_cid_new, 10 - DROP COLUMN IF EXISTS record_rkey, 11 - DROP COLUMN IF EXISTS record_type, 12 - DROP COLUMN IF EXISTS reason_enum;

-24

migrations/2025-11-14-013020_add_normalized_notification_columns/up.sql

··· 1 - -- Add new normalized columns to notifications table 2 - -- These columns will replace uri and reason_subject with structured data 3 - 4 - -- Add reason enum column (will replace TEXT reason column) 5 - ALTER TABLE notifications 6 - ADD COLUMN reason_enum notification_reason; 7 - 8 - -- Add record fields (will replace uri: at://did/collection/rkey) 9 - ALTER TABLE notifications 10 - ADD COLUMN record_type notification_record_type, 11 - ADD COLUMN record_rkey TEXT, 12 - ADD COLUMN record_cid_new TEXT; -- Temporary name to avoid conflict with existing cid 13 - 14 - -- Add subject fields (will replace reason_subject: at://did/collection/rkey) 15 - ALTER TABLE notifications 16 - ADD COLUMN subject_actor_id INT REFERENCES actors(id) ON DELETE CASCADE, 17 - ADD COLUMN subject_record_type notification_record_type, 18 - ADD COLUMN subject_rkey TEXT; 19 - 20 - -- Create index on subject_actor_id for efficient lookups 21 - CREATE INDEX idx_notifications_subject_actor ON notifications(subject_actor_id); 22 - 23 - -- Note: Columns are nullable during migration. They will be populated in the next migration 24 - -- and then made NOT NULL (where appropriate) in the finalization migration.

-12

migrations/2025-11-14-013103_populate_normalized_notification_columns/down.sql

··· 1 - -- Reverse the population by setting all normalized columns back to NULL 2 - -- This allows us to re-run the population logic if needed 3 - 4 - UPDATE notifications 5 - SET 6 - reason_enum = NULL, 7 - record_type = NULL, 8 - record_rkey = NULL, 9 - record_cid_new = NULL, 10 - subject_actor_id = NULL, 11 - subject_record_type = NULL, 12 - subject_rkey = NULL;

-59

migrations/2025-11-14-013103_populate_normalized_notification_columns/up.sql

··· 1 - -- Populate normalized notification columns from existing URI data 2 - -- 3 - -- This migration parses AT-URIs and populates the new structured columns. 4 - -- AT-URI format: at://did:plc:xxx/collection/rkey 5 - 6 - -- Step 1: Populate reason_enum from existing TEXT reason column 7 - UPDATE notifications 8 - SET reason_enum = reason::notification_reason; 9 - 10 - -- Step 2: Populate record fields from uri column 11 - -- Extract rkey (last path component after final /) 12 - UPDATE notifications 13 - SET record_rkey = substring(uri from '[^/]+$'); 14 - 15 - -- Copy CID to new column 16 - UPDATE notifications 17 - SET record_cid_new = cid; 18 - 19 - -- Determine record_type from collection in URI 20 - UPDATE notifications 21 - SET record_type = CASE 22 - WHEN uri LIKE '%/app.bsky.feed.like/%' THEN 'like'::notification_record_type 23 - WHEN uri LIKE '%/app.bsky.feed.repost/%' THEN 'repost'::notification_record_type 24 - WHEN uri LIKE '%/app.bsky.graph.follow/%' THEN 'follow'::notification_record_type 25 - WHEN uri LIKE '%/app.bsky.feed.post/%' THEN 'post'::notification_record_type 26 - ELSE NULL 27 - END; 28 - 29 - -- Step 3: Populate subject fields from reason_subject column 30 - -- Extract rkey from reason_subject 31 - UPDATE notifications 32 - SET subject_rkey = substring(reason_subject from '[^/]+$') 33 - WHERE reason_subject IS NOT NULL; 34 - 35 - -- Determine subject_record_type from collection in reason_subject 36 - UPDATE notifications 37 - SET subject_record_type = CASE 38 - WHEN reason_subject LIKE '%/app.bsky.feed.post/%' THEN 'post'::notification_record_type 39 - WHEN reason_subject LIKE '%/app.bsky.feed.like/%' THEN 'like'::notification_record_type 40 - WHEN reason_subject LIKE '%/app.bsky.feed.repost/%' THEN 'repost'::notification_record_type 41 - ELSE NULL 42 - END 43 - WHERE reason_subject IS NOT NULL; 44 - 45 - -- Resolve subject_actor_id from DID in reason_subject 46 - -- Extract DID: at://did:plc:xxx/... 47 - UPDATE notifications n 48 - SET subject_actor_id = ( 49 - SELECT a.id 50 - FROM actors a 51 - WHERE a.did = substring(n.reason_subject from 'at://([^/]+)') 52 - ) 53 - WHERE n.reason_subject IS NOT NULL; 54 - 55 - -- Verification queries (run these to check data integrity): 56 - -- SELECT COUNT(*) FROM notifications WHERE reason_enum IS NULL; -- Should be 0 57 - -- SELECT COUNT(*) FROM notifications WHERE record_type IS NULL; -- Should be 0 58 - -- SELECT COUNT(*) FROM notifications WHERE record_rkey IS NULL; -- Should be 0 59 - -- SELECT COUNT(*) FROM notifications WHERE reason_subject IS NOT NULL AND subject_actor_id IS NULL; -- Orphaned subjects

-41

migrations/2025-11-14-013210_finalize_notification_normalization/down.sql

··· 1 - -- Reverse finalization by restoring old columns 2 - -- WARNING: This will lose data since we can't perfectly reconstruct URIs 3 - -- This is mainly for development/testing rollback scenarios 4 - 5 - -- Step 1: Drop unique constraint 6 - ALTER TABLE notifications 7 - DROP CONSTRAINT IF EXISTS notifications_unique_record; 8 - 9 - -- Step 2: Rename columns back to temporary names 10 - ALTER TABLE notifications 11 - RENAME COLUMN reason TO reason_enum; 12 - 13 - ALTER TABLE notifications 14 - RENAME COLUMN record_cid TO record_cid_new; 15 - 16 - -- Step 3: Add back old columns (nullable for now) 17 - ALTER TABLE notifications 18 - ADD COLUMN uri TEXT, 19 - ADD COLUMN cid TEXT, 20 - ADD COLUMN reason TEXT, 21 - ADD COLUMN reason_subject TEXT; 22 - 23 - -- Step 4: Attempt to reconstruct URIs (will be incomplete - DIDs are lost!) 24 - -- NOTE: This won't work perfectly because we don't have DIDs anymore 25 - -- In a real rollback scenario, you'd need to join with actors table 26 - UPDATE notifications 27 - SET 28 - cid = record_cid_new, 29 - reason = reason_enum::TEXT; 30 - 31 - -- NOTE: uri and reason_subject cannot be fully reconstructed without DIDs 32 - -- These would need to be populated from actors table joins 33 - -- For now, mark them as incomplete 34 - UPDATE notifications 35 - SET uri = 'INCOMPLETE:' || record_type::TEXT || '/' || record_rkey; 36 - 37 - -- Step 5: Make old columns NOT NULL (if they were before) 38 - -- Skipped because we can't fully reconstruct the data 39 - 40 - -- Step 6: Recreate uri index 41 - CREATE UNIQUE INDEX notifications_uri_key ON notifications(uri);

-49

migrations/2025-11-14-013210_finalize_notification_normalization/up.sql

··· 1 - -- Finalize notification normalization 2 - -- Make new columns NOT NULL, drop old columns, add constraints 3 - 4 - -- Step 1: Make required columns NOT NULL 5 - ALTER TABLE notifications 6 - ALTER COLUMN reason_enum SET NOT NULL, 7 - ALTER COLUMN record_type SET NOT NULL, 8 - ALTER COLUMN record_rkey SET NOT NULL, 9 - ALTER COLUMN record_cid_new SET NOT NULL; 10 - 11 - -- Step 2: Drop old columns that have been replaced 12 - ALTER TABLE notifications 13 - DROP COLUMN uri, 14 - DROP COLUMN cid, 15 - DROP COLUMN reason, 16 - DROP COLUMN reason_subject; 17 - 18 - -- Step 3: Rename record_cid_new to record_cid (now that old cid is gone) 19 - ALTER TABLE notifications 20 - RENAME COLUMN record_cid_new TO record_cid; 21 - 22 - -- Step 4: Rename reason_enum to reason (now that old reason is gone) 23 - ALTER TABLE notifications 24 - RENAME COLUMN reason_enum TO reason; 25 - 26 - -- Step 5: Add unique constraint on (author_actor_id, record_type, record_rkey) 27 - -- This replaces the old uri uniqueness constraint 28 - ALTER TABLE notifications 29 - ADD CONSTRAINT notifications_unique_record 30 - UNIQUE (author_actor_id, record_type, record_rkey); 31 - 32 - -- Step 6: Drop old uri index (no longer needed) 33 - DROP INDEX IF EXISTS idx_notifications_uri; 34 - 35 - -- Final schema: 36 - -- notifications ( 37 - -- id BIGSERIAL PRIMARY KEY, 38 - -- recipient_actor_id INT NOT NULL, 39 - -- author_actor_id INT NOT NULL, 40 - -- indexed_at TIMESTAMPTZ NOT NULL, 41 - -- reason notification_reason NOT NULL, 42 - -- record_type notification_record_type NOT NULL, 43 - -- record_rkey TEXT NOT NULL, 44 - -- record_cid TEXT NOT NULL, 45 - -- subject_actor_id INT (nullable), 46 - -- subject_record_type notification_record_type (nullable), 47 - -- subject_rkey TEXT (nullable), 48 - -- UNIQUE (author_actor_id, record_type, record_rkey) 49 - -- )

-24

migrations/2025-11-14-015525_normalize_notification_state/down.sql

··· 1 - -- Revert notification_state normalization (restore DID-based primary key) 2 - 3 - -- Step 1: Add back DID column 4 - ALTER TABLE notification_state 5 - ADD COLUMN did TEXT; 6 - 7 - -- Step 2: Populate DID from actor_id 8 - UPDATE notification_state ns 9 - SET did = a.did 10 - FROM actors a 11 - WHERE ns.actor_id = a.id; 12 - 13 - -- Step 3: Make DID NOT NULL 14 - ALTER TABLE notification_state 15 - ALTER COLUMN did SET NOT NULL; 16 - 17 - -- Step 4: Drop actor_id primary key and column 18 - ALTER TABLE notification_state 19 - DROP CONSTRAINT notification_state_pkey, 20 - DROP COLUMN actor_id; 21 - 22 - -- Step 5: Add back DID primary key 23 - ALTER TABLE notification_state 24 - ADD PRIMARY KEY (did);

-25

migrations/2025-11-14-015525_normalize_notification_state/up.sql

··· 1 - -- Normalize notification_state table to use actor_id instead of DID 2 - -- This migration changes the primary key from DID (text) to actor_id (integer) 3 - 4 - -- Step 1: Add actor_id column 5 - ALTER TABLE notification_state 6 - ADD COLUMN actor_id INT REFERENCES actors(id) ON DELETE CASCADE; 7 - 8 - -- Step 2: Populate actor_id from DIDs 9 - UPDATE notification_state ns 10 - SET actor_id = a.id 11 - FROM actors a 12 - WHERE ns.did = a.did; 13 - 14 - -- Step 3: Make actor_id NOT NULL (will fail if any DIDs couldn't be resolved) 15 - ALTER TABLE notification_state 16 - ALTER COLUMN actor_id SET NOT NULL; 17 - 18 - -- Step 4: Drop old primary key and DID column 19 - ALTER TABLE notification_state 20 - DROP CONSTRAINT notification_state_pkey, 21 - DROP COLUMN did; 22 - 23 - -- Step 5: Add new primary key on actor_id 24 - ALTER TABLE notification_state 25 - ADD PRIMARY KEY (actor_id);

-59

migrations/2025-11-15-030105_optimize_notification_storage/down.sql

··· 1 - -- Reverse optimization of notification storage 2 - -- Convert back from INT8 (TID rkeys) and BYTEA (CID digests) to TEXT 3 - 4 - -- Step 1: Add back TEXT columns 5 - ALTER TABLE notifications 6 - ADD COLUMN record_rkey_text TEXT, 7 - ADD COLUMN record_cid_text TEXT, 8 - ADD COLUMN subject_rkey_text TEXT; 9 - 10 - -- Step 2: Migrate data back to TEXT format 11 - UPDATE notifications 12 - SET 13 - record_rkey_text = i64_to_tid(record_rkey), 14 - record_cid_text = ( 15 - SELECT encode( 16 - '\x01\x71\x12\x20'::bytea || record_cid, 17 - 'base32' 18 - ) 19 - ), 20 - subject_rkey_text = CASE 21 - WHEN subject_rkey IS NOT NULL THEN i64_to_tid(subject_rkey) 22 - ELSE NULL 23 - END; 24 - 25 - -- Note: PostgreSQL's encode() with base32 produces uppercase, but CIDs use lowercase 26 - -- We need to prepend 'b' (multibase prefix) and convert to lowercase 27 - UPDATE notifications 28 - SET record_cid_text = 'b' || LOWER(record_cid_text); 29 - 30 - -- Step 3: Make TEXT columns NOT NULL (where appropriate) 31 - ALTER TABLE notifications 32 - ALTER COLUMN record_rkey_text SET NOT NULL, 33 - ALTER COLUMN record_cid_text SET NOT NULL; 34 - 35 - -- Step 4: Drop optimized columns 36 - ALTER TABLE notifications 37 - DROP COLUMN record_rkey, 38 - DROP COLUMN record_cid, 39 - DROP COLUMN subject_rkey; 40 - 41 - -- Step 5: Rename TEXT columns to original names 42 - ALTER TABLE notifications 43 - RENAME COLUMN record_rkey_text TO record_rkey; 44 - ALTER TABLE notifications 45 - RENAME COLUMN record_cid_text TO record_cid; 46 - ALTER TABLE notifications 47 - RENAME COLUMN subject_rkey_text TO subject_rkey; 48 - 49 - -- Step 6: Recreate unique constraint with TEXT columns 50 - ALTER TABLE notifications 51 - DROP CONSTRAINT IF EXISTS notifications_unique_record; 52 - ALTER TABLE notifications 53 - ADD CONSTRAINT notifications_unique_record 54 - UNIQUE (author_actor_id, record_type, record_rkey); 55 - 56 - -- Remove comments (back to unoptimized schema) 57 - COMMENT ON COLUMN notifications.record_rkey IS NULL; 58 - COMMENT ON COLUMN notifications.record_cid IS NULL; 59 - COMMENT ON COLUMN notifications.subject_rkey IS NULL;

-49

migrations/2025-11-15-030105_optimize_notification_storage/up.sql

··· 1 - -- Optimize notification storage to use INT8 (TID rkeys) and BYTEA (CID digests) 2 - -- This matches the pattern used by posts, likes, reposts, and follows tables 3 - -- Storage savings: TID TEXT→INT8 (38% smaller), CID TEXT→BYTEA (46% smaller) 4 - 5 - -- WARNING: This migration truncates the notifications table 6 - -- Notifications will be regenerated naturally as new activity occurs 7 - 8 - -- Step 1: Truncate existing notifications (they'll be regenerated) 9 - TRUNCATE TABLE notifications; 10 - 11 - -- Step 2: Drop old columns 12 - ALTER TABLE notifications 13 - DROP COLUMN record_rkey, 14 - DROP COLUMN record_cid, 15 - DROP COLUMN subject_rkey; 16 - 17 - -- Step 3: Add new optimized columns 18 - ALTER TABLE notifications 19 - ADD COLUMN record_rkey INT8 NOT NULL, 20 - ADD COLUMN record_cid BYTEA NOT NULL, 21 - ADD COLUMN subject_rkey INT8; 22 - 23 - -- Step 4: Recreate unique constraint with new column types 24 - ALTER TABLE notifications 25 - DROP CONSTRAINT IF EXISTS notifications_unique_record; 26 - ALTER TABLE notifications 27 - ADD CONSTRAINT notifications_unique_record 28 - UNIQUE (author_actor_id, record_type, record_rkey); 29 - 30 - -- Add documentation comments 31 - COMMENT ON COLUMN notifications.record_rkey IS 'TID as INT8 (convert to string via i64_to_tid())'; 32 - COMMENT ON COLUMN notifications.record_cid IS '32-byte CID digest (header stripped, reconstruct via digest_to_record_cid())'; 33 - COMMENT ON COLUMN notifications.subject_rkey IS 'Optional TID as INT8 for subject record'; 34 - 35 - -- Final schema: 36 - -- notifications ( 37 - -- id BIGSERIAL PRIMARY KEY, 38 - -- recipient_actor_id INT NOT NULL REFERENCES actors(id), 39 - -- author_actor_id INT NOT NULL REFERENCES actors(id), 40 - -- indexed_at TIMESTAMPTZ NOT NULL, 41 - -- reason notification_reason NOT NULL, 42 - -- record_type notification_record_type NOT NULL, 43 - -- record_rkey INT8 NOT NULL, -- ✅ OPTIMIZED: was TEXT 44 - -- record_cid BYTEA NOT NULL, -- ✅ OPTIMIZED: was TEXT 45 - -- subject_actor_id INT REFERENCES actors(id), 46 - -- subject_record_type notification_record_type, 47 - -- subject_rkey INT8, -- ✅ OPTIMIZED: was TEXT 48 - -- UNIQUE (author_actor_id, record_type, record_rkey) 49 - -- )

+4 -4

parakeet-db/src/schema.rs

··· 417 417 418 418 diesel::table! { 419 419 notification_state (actor_id) { 420 + actor_id -> Int4, 420 421 seen_at -> Nullable<Timestamptz>, 421 422 unread_count -> Nullable<Int4>, 422 - actor_id -> Int4, 423 423 } 424 424 } 425 425 ··· 430 430 431 431 notifications (id) { 432 432 id -> Int8, 433 - indexed_at -> Timestamptz, 434 433 recipient_actor_id -> Int4, 435 434 author_actor_id -> Int4, 435 + indexed_at -> Timestamptz, 436 436 reason -> NotificationReason, 437 437 record_type -> NotificationRecordType, 438 + record_rkey -> Int8, 439 + record_cid -> Bytea, 438 440 subject_actor_id -> Nullable<Int4>, 439 441 subject_record_type -> Nullable<NotificationRecordType>, 440 - record_rkey -> Int8, 441 - record_cid -> Bytea, 442 442 subject_rkey -> Nullable<Int8>, 443 443 } 444 444 }

Configure Feed

Configure Feed