···18181919-- Language Codes (ISO 639-1 + regional variants)
2020-- Includes all language codes from historical database (4.6M posts)
2121+-- Extended with 38 additional codes for broader language coverage
2122CREATE TYPE language_code AS ENUM (
2223 -- Original 20 codes
2324 'en', 'es', 'fr', 'de', 'it', 'pt', 'ru', 'ja', 'ko', 'zh',
···3940 'zxx', 'cu', 'en-CA', 'es-CL', 'haw', 'en-UK', 'zh-CN', 'iw', 'tt', 'ba',
4041 'ms', 'in', 'mt', 'fr-CA', 'oc', 'sco', 'fy', 'nr', 'kl', 'pa',
4142 'nb-NO', 'oj', 'rm', 'fa-IR', 'da-DK', 'sk-SK', 'ceb', 'su', 'ca-ES', 'mn',
4242- 'ta', 'lg', 'ku', 'tok', 'jp-JP', 'gsw', 'ha', 'wa', 'yue', 'ars'
4343+ 'ta', 'lg', 'ku', 'tok', 'jp-JP', 'gsw', 'ha', 'wa', 'yue', 'ars',
4444+ -- Extended language codes (4 additional codes for broader coverage)
4545+ 'mr', 'te', 'jv', 'kn'
4346);
44474548CREATE TYPE facet_type AS ENUM ('mention', 'link', 'tag');
···116119 rkey INT8 NOT NULL,
117120 cid BYTEA NOT NULL,
118121 content BYTEA,
119119- content_version SMALLINT,
120120- language_code language_code,
122122+ langs language_code[] NOT NULL DEFAULT '{}',
121123 tags TEXT[] NOT NULL DEFAULT '{}',
122124 parent_post_id BIGINT REFERENCES posts(id) ON DELETE SET NULL,
123125 root_post_id BIGINT REFERENCES posts(id) ON DELETE SET NULL,
···127129 status post_status NOT NULL DEFAULT 'complete',
128130 search_vector tsvector,
129131 UNIQUE (actor_id, rkey),
130130- CHECK (
131131- (content IS NULL AND content_version IS NULL) OR
132132- (content IS NOT NULL AND content_version IS NOT NULL)
133133- )
132132+ CHECK (array_length(langs, 1) IS NULL OR array_length(langs, 1) <= 3)
134133);
135134136136-COMMENT ON COLUMN posts.content IS 'Zstd-compressed post content (BYTEA). NULL for stub/deleted posts.';
137137-COMMENT ON COLUMN posts.content_version IS 'Dictionary version (NULL if no content)';
135135+COMMENT ON COLUMN posts.content IS 'Zstd-compressed post content (BYTEA). NULL for stub/deleted posts. Dictionary version stored as first byte prefix.';
136136+COMMENT ON COLUMN posts.langs IS 'Languages of post content (ISO 639-1 codes). Max 3 per AT Protocol spec. Empty array means no language specified.';
138137COMMENT ON COLUMN posts.search_vector IS 'Full-text search vector (NULL except for allowlisted users)';
139138COMMENT ON COLUMN posts.status IS 'Post lifecycle state: complete (normal), stub (placeholder, needs fetch), missing (permanently unfetchable), deleted (soft-deleted), forbidden (access denied)';
140139141140CREATE INDEX idx_posts_actor ON posts(actor_id);
142141CREATE INDEX idx_posts_parent ON posts(parent_post_id) WHERE parent_post_id IS NOT NULL;
143142CREATE INDEX idx_posts_root ON posts(root_post_id) WHERE root_post_id IS NOT NULL;
143143+CREATE INDEX idx_posts_langs ON posts USING GIN(langs);
144144CREATE INDEX idx_posts_tags ON posts USING GIN(tags);
145145CREATE INDEX idx_posts_status ON posts(status) WHERE status != 'complete';
146146CREATE INDEX idx_posts_search_vector ON posts USING GIN (search_vector) WHERE search_vector IS NOT NULL;
···152152153153ALTER TABLE statuses ADD CONSTRAINT fk_statuses_embed_post
154154 FOREIGN KEY (embed_post_id) REFERENCES posts(id) ON DELETE SET NULL;
155155+156156+-- Add indexes on foreign keys for improved DELETE performance
157157+CREATE INDEX idx_profiles_pinned_post ON profiles(pinned_post_id) WHERE pinned_post_id IS NOT NULL;
155158156159-- =============================================================================
157160-- POST EMBEDS
···225228);
226229227230CREATE INDEX idx_post_facets_mention ON post_facets(mention_actor_id) WHERE mention_actor_id IS NOT NULL;
231231+CREATE INDEX idx_post_facets_link_uri ON post_facets(link_uri_id);
228232229233-- Post Mentions (denormalized for fast lookups)
230234CREATE TABLE post_mentions (
+2
migrations/2025-11-01-114841_engagement/up.sql
···3232CREATE INDEX idx_likes_actor ON likes(actor_id);
3333CREATE INDEX idx_likes_subject ON likes(subject_type, subject_id);
3434CREATE INDEX idx_likes_rkey ON likes(rkey);
3535+CREATE INDEX idx_likes_via_post ON likes(via_post_id);
35363637-- =============================================================================
3738-- REPOSTS
···5051CREATE INDEX idx_reposts_actor ON reposts(actor_id);
5152CREATE INDEX idx_reposts_post ON reposts(post_id);
5253CREATE INDEX idx_reposts_rkey ON reposts(rkey);
5454+CREATE INDEX idx_reposts_via_post ON reposts(via_post_id);
53555456-- =============================================================================
5557-- BOOKMARKS
···11+-- Drop tables in reverse order
22+DROP TABLE IF EXISTS notification_state;
33+DROP TABLE IF EXISTS notifications;
14DROP TABLE IF EXISTS notif_decl;
22-DROP TABLE IF EXISTS notification_seens;
33-DROP TABLE IF EXISTS notifications;
55+66+-- Drop types
77+DROP TYPE IF EXISTS notification_record_type;
88+DROP TYPE IF EXISTS notification_reason;
49DROP TYPE IF EXISTS notif_allow_subscriptions;
55-DROP TYPE IF EXISTS notification_reason;
66-DROP TYPE IF EXISTS notification_record_type;
+73-2
migrations/2025-11-01-114842_notifications/up.sql
···11-- =============================================================================
22--- NOTIFICATION PREFERENCES
22+-- NOTIFICATIONS
33-- =============================================================================
44--
55--- Notification preferences only (notifications are stored in Redis)
55+-- Notification system: preferences, notification events, and user state
66+-- Optimized storage with INT8 (TID rkeys) and BYTEA (CID digests)
67--
78-- =============================================================================
891010+-- =============================================================================
1111+-- NOTIFICATION TYPES
1212+-- =============================================================================
1313+914CREATE TYPE notif_allow_subscriptions AS ENUM ('none', 'mutuals', 'followers');
10151616+-- Notification reason enum
1717+-- This enum represents why a notification was created
1818+CREATE TYPE notification_reason AS ENUM (
1919+ 'like', -- User liked a post
2020+ 'repost', -- User reposted a post
2121+ 'follow', -- User followed someone
2222+ 'reply', -- User replied to a post
2323+ 'mention', -- User mentioned someone in a post
2424+ 'quote' -- User quoted a post
2525+);
2626+2727+-- Record type enum for notification records
2828+-- This enum represents the type of record that triggered the notification
2929+CREATE TYPE notification_record_type AS ENUM (
3030+ 'like', -- app.bsky.feed.like
3131+ 'repost', -- app.bsky.feed.repost
3232+ 'follow', -- app.bsky.graph.follow
3333+ 'post' -- app.bsky.feed.post (used for reply/mention/quote)
3434+);
3535+3636+-- =============================================================================
3737+-- NOTIFICATION PREFERENCES
3838+-- =============================================================================
3939+1140-- Notification preferences
1241CREATE TABLE notif_decl (
1342 actor_id INTEGER PRIMARY KEY REFERENCES actors(id) ON DELETE CASCADE,
1443 allow_subscriptions notif_allow_subscriptions,
1544 created_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
1645);
4646+4747+-- =============================================================================
4848+-- NOTIFICATIONS TABLE
4949+-- =============================================================================
5050+5151+-- Notifications table with optimized storage
5252+CREATE TABLE notifications (
5353+ id BIGSERIAL PRIMARY KEY,
5454+ recipient_actor_id INT NOT NULL REFERENCES actors(id) ON DELETE CASCADE,
5555+ author_actor_id INT NOT NULL REFERENCES actors(id) ON DELETE CASCADE,
5656+ indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
5757+ reason notification_reason NOT NULL,
5858+ record_type notification_record_type NOT NULL,
5959+ record_rkey INT8 NOT NULL,
6060+ record_cid BYTEA NOT NULL,
6161+ subject_actor_id INT REFERENCES actors(id) ON DELETE CASCADE,
6262+ subject_record_type notification_record_type,
6363+ subject_rkey INT8,
6464+ UNIQUE (author_actor_id, record_type, record_rkey)
6565+);
6666+6767+COMMENT ON COLUMN notifications.record_rkey IS 'TID as INT8 (convert to string via i64_to_tid())';
6868+COMMENT ON COLUMN notifications.record_cid IS '32-byte CID digest (header stripped, reconstruct via digest_to_record_cid())';
6969+COMMENT ON COLUMN notifications.subject_rkey IS 'Optional TID as INT8 for subject record';
7070+7171+-- Indexes for notifications
7272+CREATE INDEX idx_notifications_recipient_time
7373+ ON notifications(recipient_actor_id, indexed_at DESC);
7474+7575+CREATE INDEX idx_notifications_indexed_at
7676+ ON notifications(indexed_at);
7777+7878+-- =============================================================================
7979+-- NOTIFICATION STATE
8080+-- =============================================================================
8181+8282+-- Per-user notification state
8383+CREATE TABLE notification_state (
8484+ actor_id INT PRIMARY KEY REFERENCES actors(id) ON DELETE CASCADE,
8585+ seen_at TIMESTAMPTZ,
8686+ unread_count INTEGER DEFAULT 0
8787+);
+1-1
migrations/2025-11-01-114846_moderation/up.sql
···1414CREATE TYPE subject_type AS ENUM ('account', 'record', 'chat');
1515CREATE TYPE label_severity AS ENUM ('inform', 'alert', 'none');
1616CREATE TYPE label_blurs AS ENUM ('content', 'media', 'none');
1717-CREATE TYPE label_default_setting AS ENUM ('ignore', 'warn', 'hide');
1717+CREATE TYPE label_default_setting AS ENUM ('ignore', 'warn', 'hide', 'inform', 'show');
18181919-- =============================================================================
2020-- LABELERS
···11+-- Drop queue tables
22+DROP TABLE IF EXISTS constellation_enrichment_queue;
33+DROP TABLE IF EXISTS handle_resolution_queue;
44+DROP TABLE IF EXISTS jetstream_cursors;
55+DROP TABLE IF EXISTS backfill_jobs;
66+DROP TABLE IF EXISTS fetch_queue;
77+88+-- Drop extensions
99+DROP EXTENSION IF EXISTS pgrouting;
1010+DROP EXTENSION IF EXISTS postgis;
1111+DROP EXTENSION IF EXISTS pg_stat_statements;
···11+-- =============================================================================
22+-- QUEUES & EXTENSIONS
33+-- =============================================================================
44+--
55+-- Queue tables for async processing and PostgreSQL extensions
66+--
77+-- =============================================================================
88+99+-- =============================================================================
1010+-- POSTGRESQL EXTENSIONS
1111+-- =============================================================================
1212+1313+-- pg_stat_statements: Query performance monitoring and optimization
1414+-- Tracks execution statistics for all SQL statements, enabling identification
1515+-- of slow queries and optimization opportunities. Configured in postgresql.conf
1616+-- via shared_preload_libraries for optimal performance.
1717+CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
1818+1919+-- PostGIS: Geospatial extensions (required dependency for pgRouting)
2020+-- While Parakeet doesn't use geospatial features directly, this is a
2121+-- prerequisite for pgRouting's graph analysis capabilities.
2222+CREATE EXTENSION IF NOT EXISTS postgis;
2323+2424+-- pgRouting: Graph analysis algorithms for social network analysis
2525+-- Enables advanced graph queries on the follows table, including:
2626+-- - Shortest path between users (connection discovery)
2727+-- - Community detection (strongly connected components)
2828+-- - Centrality metrics (influential users, key connectors)
2929+-- - Multi-hop neighborhood queries (N-degree connections)
3030+CREATE EXTENSION IF NOT EXISTS pgrouting;
3131+3232+-- Note: timescaledb and pgvector are available via flake.nix but not enabled yet
3333+--
3434+-- timescaledb: Time-series optimization for notifications and stats tables
3535+-- Deferred until we design schema migration strategy (hypertables, partitioning)
3636+-- Future use: trending posts, notification retention policies, time-bucketing
3737+--
3838+-- pgvector: Vector similarity search for semantic/ML-based features
3939+-- Deferred until concrete use case emerges (embeddings, semantic search)
4040+-- Future use: content-based recommendations, duplicate detection
4141+4242+-- =============================================================================
4343+-- FETCH QUEUE
4444+-- =============================================================================
4545+4646+-- Fetch queue table to replace Redis list-based queue
4747+CREATE TABLE fetch_queue (
4848+ id BIGSERIAL PRIMARY KEY,
4949+ at_uri TEXT NOT NULL UNIQUE,
5050+ attempts INTEGER NOT NULL DEFAULT 0,
5151+ max_attempts INTEGER NOT NULL DEFAULT 3,
5252+ status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, failed
5353+ last_error TEXT,
5454+ created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
5555+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
5656+);
5757+5858+-- Index for queue processing (dequeue operations)
5959+CREATE INDEX idx_fetch_queue_status_created
6060+ ON fetch_queue(status, created_at)
6161+ WHERE status IN ('pending', 'processing');
6262+6363+-- =============================================================================
6464+-- BACKFILL JOBS
6565+-- =============================================================================
6666+6767+-- Backfill jobs table to replace Redis job management
6868+CREATE TABLE backfill_jobs (
6969+ did TEXT PRIMARY KEY,
7070+ status TEXT NOT NULL DEFAULT 'pending',
7171+ -- Status values: pending, processing, successful, failed.retry, failed.permanent
7272+ attempts INTEGER NOT NULL DEFAULT 0,
7373+ max_attempts INTEGER NOT NULL DEFAULT 3,
7474+ last_error TEXT,
7575+ scheduled_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), -- For exponential backoff
7676+ started_at TIMESTAMPTZ,
7777+ completed_at TIMESTAMPTZ
7878+);
7979+8080+-- Index for queue processing (dequeue operations)
8181+CREATE INDEX idx_backfill_status_scheduled
8282+ ON backfill_jobs(status, scheduled_at)
8383+ WHERE status IN ('pending', 'failed.retry');
8484+8585+-- Index for detecting stale processing jobs
8686+CREATE INDEX idx_backfill_processing_stale
8787+ ON backfill_jobs(status, started_at)
8888+ WHERE status = 'processing';
8989+9090+-- =============================================================================
9191+-- JETSTREAM CURSORS
9292+-- =============================================================================
9393+9494+-- Jetstream cursors table to replace Redis cursor storage
9595+CREATE TABLE jetstream_cursors (
9696+ partition TEXT PRIMARY KEY, -- e.g., "posts", "likes", "social"
9797+ cursor_value BIGINT NOT NULL, -- Timestamp in microseconds
9898+ updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
9999+);
100100+101101+-- =============================================================================
102102+-- HANDLE RESOLUTION QUEUE
103103+-- =============================================================================
104104+105105+CREATE TABLE handle_resolution_queue (
106106+ id BIGSERIAL PRIMARY KEY,
107107+ did TEXT NOT NULL UNIQUE,
108108+ attempts INTEGER NOT NULL DEFAULT 0,
109109+ max_attempts INTEGER NOT NULL DEFAULT 3,
110110+ status TEXT NOT NULL DEFAULT 'pending',
111111+ last_error TEXT,
112112+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
113113+ updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
114114+);
115115+116116+CREATE INDEX idx_handle_resolution_status_created
117117+ ON handle_resolution_queue (status, created_at)
118118+ WHERE status IN ('pending', 'processing');
119119+120120+COMMENT ON TABLE handle_resolution_queue IS 'Queue for asynchronous DID handle resolution';
121121+122122+-- =============================================================================
123123+-- CONSTELLATION ENRICHMENT QUEUE
124124+-- =============================================================================
125125+126126+CREATE TABLE constellation_enrichment_queue (
127127+ id BIGSERIAL PRIMARY KEY,
128128+ did TEXT NOT NULL,
129129+ post_uris JSONB NOT NULL, -- Array of post URIs for this actor
130130+ attempts INTEGER NOT NULL DEFAULT 0,
131131+ max_attempts INTEGER NOT NULL DEFAULT 3,
132132+ status TEXT NOT NULL DEFAULT 'pending',
133133+ last_error TEXT,
134134+ created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
135135+ updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
136136+ UNIQUE(did) -- One enrichment job per DID
137137+);
138138+139139+CREATE INDEX idx_constellation_status_created
140140+ ON constellation_enrichment_queue (status, created_at)
141141+ WHERE status IN ('pending', 'processing');
142142+143143+COMMENT ON TABLE constellation_enrichment_queue IS 'Queue for asynchronous Constellation API enrichment';
···11--- Remove indexes added for foreign key performance
22-33-DROP INDEX IF EXISTS idx_likes_via_post;
44-DROP INDEX IF EXISTS idx_reposts_via_post;
55-DROP INDEX IF EXISTS idx_profiles_pinned_post;
66-DROP INDEX IF EXISTS idx_post_facets_link_uri;
···11--- Add missing indexes on foreign key columns
22---
33--- These indexes are critical for performance when deleting posts or other referenced records.
44--- Without them, DELETE CASCADE operations require full table scans.
55---
66--- IMPORTANT: For production databases, run these with CONCURRENTLY to avoid locking:
77--- CREATE INDEX CONCURRENTLY idx_likes_via_post ON likes(via_post_id);
88--- CREATE INDEX CONCURRENTLY idx_reposts_via_post ON reposts(via_post_id);
99--- CREATE INDEX CONCURRENTLY idx_profiles_pinned_post ON profiles(pinned_post_id) WHERE pinned_post_id IS NOT NULL;
1010--- CREATE INDEX CONCURRENTLY idx_post_facets_link_uri ON post_facets(link_uri_id);
1111-1212--- CRITICAL: Improves post deletion performance
1313--- Without this, deleting a post scans all 949K+ likes rows
1414-CREATE INDEX IF NOT EXISTS idx_likes_via_post ON likes(via_post_id);
1515-1616--- CRITICAL: Improves post deletion performance
1717--- Without this, deleting a post scans all 61K+ reposts rows
1818-CREATE INDEX IF NOT EXISTS idx_reposts_via_post ON reposts(via_post_id);
1919-2020--- HIGH PRIORITY: Improves post deletion performance for pinned posts
2121--- Partial index only covers ~70 profiles with pinned posts (out of 381 total)
2222-CREATE INDEX IF NOT EXISTS idx_profiles_pinned_post
2323-ON profiles(pinned_post_id)
2424-WHERE pinned_post_id IS NOT NULL;
2525-2626--- MEDIUM PRIORITY: Improves URI deletion performance
2727--- Covers 9,451 rows with link facets
2828-CREATE INDEX IF NOT EXISTS idx_post_facets_link_uri ON post_facets(link_uri_id);
···11--- Create notifications table to replace Redis sorted sets
22-CREATE TABLE notifications (
33- id BIGSERIAL PRIMARY KEY,
44- recipient_did TEXT NOT NULL,
55- author_did TEXT NOT NULL,
66- uri TEXT NOT NULL UNIQUE, -- For deduplication (e.g., at://did/app.bsky.feed.like/rkey)
77- cid TEXT NOT NULL,
88- reason TEXT NOT NULL, -- like, repost, follow, reply, mention, quote
99- reason_subject TEXT, -- Optional subject URI (e.g., the post that was liked)
1010- indexed_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
1111-);
1212-1313--- Index for listing notifications by recipient (pagination)
1414-CREATE INDEX idx_notifications_recipient_time
1515- ON notifications(recipient_did, indexed_at DESC);
1616-1717--- Index for deletion by URI
1818-CREATE INDEX idx_notifications_uri
1919- ON notifications(uri);
2020-2121--- Index for cleanup worker (delete old notifications)
2222-CREATE INDEX idx_notifications_indexed_at
2323- ON notifications(indexed_at);
2424-2525--- Per-user notification state
2626-CREATE TABLE notification_state (
2727- did TEXT PRIMARY KEY,
2828- seen_at TIMESTAMPTZ,
2929- unread_count INTEGER DEFAULT 0
3030-);
···11--- Create fetch queue table to replace Redis list-based queue
22-CREATE TABLE fetch_queue (
33- id BIGSERIAL PRIMARY KEY,
44- at_uri TEXT NOT NULL UNIQUE,
55- attempts INTEGER NOT NULL DEFAULT 0,
66- max_attempts INTEGER NOT NULL DEFAULT 3,
77- status TEXT NOT NULL DEFAULT 'pending', -- pending, processing, failed
88- last_error TEXT,
99- created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
1010- updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
1111-);
1212-1313--- Index for queue processing (dequeue operations)
1414-CREATE INDEX idx_fetch_queue_status_created
1515- ON fetch_queue(status, created_at)
1616- WHERE status IN ('pending', 'processing');
···11--- Create backfill jobs table to replace Redis job management
22-CREATE TABLE backfill_jobs (
33- did TEXT PRIMARY KEY,
44- status TEXT NOT NULL DEFAULT 'pending',
55- -- Status values: pending, processing, successful, failed.retry, failed.permanent
66- attempts INTEGER NOT NULL DEFAULT 0,
77- max_attempts INTEGER NOT NULL DEFAULT 3,
88- last_error TEXT,
99- scheduled_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), -- For exponential backoff
1010- started_at TIMESTAMPTZ,
1111- completed_at TIMESTAMPTZ
1212-);
1313-1414--- Index for queue processing (dequeue operations)
1515-CREATE INDEX idx_backfill_status_scheduled
1616- ON backfill_jobs(status, scheduled_at)
1717- WHERE status IN ('pending', 'failed.retry');
1818-1919--- Index for detecting stale processing jobs
2020-CREATE INDEX idx_backfill_processing_stale
2121- ON backfill_jobs(status, started_at)
2222- WHERE status = 'processing';
···11-CREATE TABLE handle_resolution_queue (
22- id BIGSERIAL PRIMARY KEY,
33- did TEXT NOT NULL UNIQUE,
44- attempts INTEGER NOT NULL DEFAULT 0,
55- max_attempts INTEGER NOT NULL DEFAULT 3,
66- status TEXT NOT NULL DEFAULT 'pending',
77- last_error TEXT,
88- created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
99- updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
1010-);
1111-1212-CREATE INDEX idx_handle_resolution_status_created
1313- ON handle_resolution_queue (status, created_at)
1414- WHERE status IN ('pending', 'processing');
1515-1616-COMMENT ON TABLE handle_resolution_queue IS 'Queue for asynchronous DID handle resolution';
···11-CREATE TABLE constellation_enrichment_queue (
22- id BIGSERIAL PRIMARY KEY,
33- did TEXT NOT NULL,
44- post_uris JSONB NOT NULL, -- Array of post URIs for this actor
55- attempts INTEGER NOT NULL DEFAULT 0,
66- max_attempts INTEGER NOT NULL DEFAULT 3,
77- status TEXT NOT NULL DEFAULT 'pending',
88- last_error TEXT,
99- created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
1010- updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
1111- UNIQUE(did) -- One enrichment job per DID
1212-);
1313-1414-CREATE INDEX idx_constellation_status_created
1515- ON constellation_enrichment_queue (status, created_at)
1616- WHERE status IN ('pending', 'processing');
1717-1818-COMMENT ON TABLE constellation_enrichment_queue IS 'Queue for asynchronous Constellation API enrichment';
···11--- Drop aggregate stats tables and enums
22-DROP TABLE IF EXISTS post_aggregate_stats;
11+-- Drop aggregate stats tables
32DROP TABLE IF EXISTS actor_aggregate_stats;
44-DROP TYPE IF EXISTS post_stat_type;
33+DROP TABLE IF EXISTS post_aggregate_stats;
44+55+-- Drop enums
56DROP TYPE IF EXISTS actor_stat_type;
77+DROP TYPE IF EXISTS post_stat_type;
···11--- PostgreSQL does not support removing enum values directly.
22--- To rollback this migration, you would need to:
33--- 1. Ensure no rows use 'inform' or 'show' values
44--- 2. Drop and recreate the enum type (requires dropping dependent columns first)
55--- 3. Recreate all tables/columns that use this type
66---
77--- Since this is impractical and the new values are required for compatibility
88--- with real production labelers, rollback is not supported.
99-1010--- If you absolutely need to rollback, manually run:
1111--- BEGIN;
1212--- -- Delete any rows using the new values
1313--- DELETE FROM labeler_defs WHERE default_setting IN ('inform', 'show');
1414--- -- Then drop and recreate the type (complex, requires recreating dependent tables)
1515--- ROLLBACK; -- Don't actually do this without a full backup!
1616-1717-SELECT 1; -- No-op to make diesel happy
···11--- Add missing enum values to label_default_setting
22--- These values are used by real labelers in production (e.g., Khronos uses 'inform', Bluesky Moderation uses 'show')
33--- even though they're not in the AT Protocol spec's "knownValues"
44-55-ALTER TYPE label_default_setting ADD VALUE IF NOT EXISTS 'inform';
66-ALTER TYPE label_default_setting ADD VALUE IF NOT EXISTS 'show';
···11--- Restore the content_version column
22--- Note: This is not truly reversible as we've lost the original version data
33--- The column will be restored but values will be NULL
44-ALTER TABLE posts ADD COLUMN content_version smallint;
···11--- Drop the content_version column from posts table
22--- This column is no longer needed as we now use a single-byte
33--- dictionary version prefix in the compressed content itself
44-ALTER TABLE posts DROP COLUMN content_version;
···11--- ============================================================================
22--- MIGRATION ROLLBACK: Revert language codes expansion and array conversion
33--- ============================================================================
44--- This rollback:
55--- 1. Drops GIN index on langs
66--- 2. Drops max-3 constraint
77--- 3. Converts langs array back to single language_code
88--- 4. Renames column back to language_code
99--- 5. Does NOT remove enum values (PostgreSQL doesn't support removing enum values)
1010---
1111--- WARNING: This will lose language data if posts have multiple languages!
1212--- Only the first language will be preserved.
1313-1414--- Step 1: Drop GIN index
1515-DROP INDEX IF EXISTS idx_posts_langs;
1616-1717--- Step 2: Drop max-3 constraint
1818-ALTER TABLE posts
1919- DROP CONSTRAINT IF EXISTS posts_langs_max_3;
2020-2121--- Step 3: Rename column back
2222-ALTER TABLE posts
2323- RENAME COLUMN langs TO language_code;
2424-2525--- Step 4: Convert array back to single enum
2626--- WARNING: Data loss! Only keeps first language from array
2727-ALTER TABLE posts
2828- ALTER COLUMN language_code TYPE language_code
2929- USING CASE
3030- WHEN array_length(language_code, 1) > 0 THEN language_code[1]
3131- ELSE NULL
3232- END;
3333-3434--- Step 5: Remove comment
3535-COMMENT ON COLUMN posts.language_code IS NULL;
3636-3737--- Note: Cannot remove enum values from PostgreSQL enum type
3838--- The following values will remain in the enum but won't be used:
3939--- id, th, vi, cs, ro, el, he, bn, ca, fa, sk, bg, hr, lt, lv, sl, et, ga, cy, is, mt, eu, ms, tl, ur, sr, hu, la
4040--- This is a PostgreSQL limitation - enum values cannot be removed once added
···11--- ============================================================================
22--- MIGRATION: Expand language codes and convert to array support
33--- ============================================================================
44--- This migration:
55--- 1. Adds 26 new language codes (id, th, vi, cs, ro, el, he, bn, ca, fa, etc.)
66--- 2. Converts posts.language_code from single enum to language_code[] array
77--- 3. Preserves existing data during conversion
88--- 4. Adds constraints for AT Protocol spec (max 3 languages)
99---
1010--- AT Protocol spec: Posts can have up to 3 language codes in langs field
1111--- Previous implementation only stored first language, this fixes data loss
1212-1313--- Step 1: Add all new language codes to the enum
1414--- Note: PostgreSQL requires adding enum values one at a time, cannot be in transaction
1515--- Based on:
1616--- - W3Techs March 2025: Top 40 internet content languages
1717--- - Ethnologue 2025: All languages with 50M+ total speakers
1818--- - Ethnologue 2025: All languages with 50M+ native speakers
1919--- Total: 38 new codes added (from 22 to 60 total)
2020-2121--- Major Asian Languages (10 codes)
2222-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'id'; -- Indonesian (252M total speakers, 1.1% web)
2323-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'th'; -- Thai (71M total speakers, 0.3% web)
2424-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'vi'; -- Vietnamese (97M total speakers, 1.1% web)
2525-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'bn'; -- Bengali (284M total speakers)
2626-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ur'; -- Urdu (246M total speakers)
2727-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'mr'; -- Marathi (99M total speakers)
2828-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'te'; -- Telugu (96M total speakers)
2929-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ta'; -- Tamil (86M total speakers)
3030-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'jv'; -- Javanese (69M total speakers)
3131-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'gu'; -- Gujarati (62M total speakers)
3232-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'kn'; -- Kannada (59M total speakers)
3333-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'pa'; -- Punjabi (90M total speakers)
3434-3535--- European Languages (16 codes)
3636-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'cs'; -- Czech (1.0% web)
3737-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ro'; -- Romanian (0.5% web)
3838-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'el'; -- Greek (0.5% web)
3939-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'sk'; -- Slovak (0.4% web)
4040-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'bg'; -- Bulgarian (0.3% web)
4141-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'hr'; -- Croatian (0.2% web)
4242-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'lt'; -- Lithuanian (0.2% web)
4343-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'sl'; -- Slovenian (0.1% web)
4444-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'lv'; -- Latvian (0.1% web)
4545-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'et'; -- Estonian (0.1% web)
4646-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'sr'; -- Serbian (0.2% web)
4747-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'bs'; -- Bosnian (0.1% web)
4848-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ca'; -- Catalan (0.1% web)
4949-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'eu'; -- Basque
5050-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ga'; -- Irish
5151-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'cy'; -- Welsh
5252-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'is'; -- Icelandic
5353-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'mt'; -- Maltese
5454-5555--- Middle Eastern & North African (2 codes)
5656-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'fa'; -- Persian/Farsi (83M total speakers, 1.2% web)
5757-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'he'; -- Hebrew (0.4% web)
5858-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'am'; -- Amharic (60M total speakers)
5959-6060--- African Languages (2 codes)
6161-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'sw'; -- Swahili (87M total speakers)
6262-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ha'; -- Hausa (94M total speakers)
6363-6464--- Southeast Asian (2 codes)
6565-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'tl'; -- Tagalog/Filipino (87M total speakers)
6666-ALTER TYPE language_code ADD VALUE IF NOT EXISTS 'ms'; -- Malay
6767-6868--- Step 2: Convert posts.language_code from single enum to array
6969--- This preserves existing data by converting NULL -> empty array, single value -> single-element array
7070-7171-ALTER TABLE posts
7272- ALTER COLUMN language_code TYPE language_code[]
7373- USING CASE
7474- WHEN language_code IS NOT NULL THEN ARRAY[language_code]
7575- ELSE ARRAY[]::language_code[]
7676- END;
7777-7878--- Step 3: Rename column to match AT Protocol naming (langs)
7979-ALTER TABLE posts
8080- RENAME COLUMN language_code TO langs;
8181-8282--- Step 3.5: Set NOT NULL and default (empty array means no language specified)
8383-ALTER TABLE posts
8484- ALTER COLUMN langs SET NOT NULL,
8585- ALTER COLUMN langs SET DEFAULT ARRAY[]::language_code[];
8686-8787--- Step 4: Add constraint for AT Protocol spec (max 3 languages per post)
8888-ALTER TABLE posts
8989- ADD CONSTRAINT posts_langs_max_3
9090- CHECK (array_length(langs, 1) IS NULL OR array_length(langs, 1) <= 3);
9191-9292--- Step 5: Add GIN index for efficient language searching
9393--- This allows queries like "find all posts in Japanese OR English"
9494-CREATE INDEX idx_posts_langs ON posts USING GIN (langs);
9595-9696--- Step 6: Update comment on column
9797-COMMENT ON COLUMN posts.langs IS 'Languages of post content (ISO 639-1 codes). Max 3 per AT Protocol spec. Empty array means no language specified.';
···11--- Rollback PostgreSQL extensions
22---
33--- Note: Extensions are dropped in reverse dependency order
44--- (pgrouting depends on postgis, so drop pgrouting first)
55-66--- Drop pgRouting (graph analysis for social networks)
77-DROP EXTENSION IF EXISTS pgrouting;
88-99--- Drop PostGIS (required by pgRouting, safe to drop after pgrouting)
1010-DROP EXTENSION IF EXISTS postgis;
1111-1212--- Drop pg_stat_statements (query performance monitoring)
1313--- Note: This will clear all accumulated query statistics
1414-DROP EXTENSION IF EXISTS pg_stat_statements;
···11--- Enable PostgreSQL extensions for enhanced functionality
22---
33--- pg_stat_statements: Query performance monitoring and optimization
44--- Tracks execution statistics for all SQL statements, enabling identification
55--- of slow queries and optimization opportunities. Configured in postgresql.conf
66--- via shared_preload_libraries for optimal performance.
77-CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
88-99--- PostGIS: Geospatial extensions (required dependency for pgRouting)
1010--- While Parakeet doesn't use geospatial features directly, this is a
1111--- prerequisite for pgRouting's graph analysis capabilities.
1212-CREATE EXTENSION IF NOT EXISTS postgis;
1313-1414--- pgRouting: Graph analysis algorithms for social network analysis
1515--- Enables advanced graph queries on the follows table, including:
1616--- - Shortest path between users (connection discovery)
1717--- - Community detection (strongly connected components)
1818--- - Centrality metrics (influential users, key connectors)
1919--- - Multi-hop neighborhood queries (N-degree connections)
2020-CREATE EXTENSION IF NOT EXISTS pgrouting;
2121-2222--- Note: timescaledb and pgvector are available via flake.nix but not enabled yet
2323---
2424--- timescaledb: Time-series optimization for notifications and stats tables
2525--- Deferred until we design schema migration strategy (hypertables, partitioning)
2626--- Future use: trending posts, notification retention policies, time-bucketing
2727---
2828--- pgvector: Vector similarity search for semantic/ML-based features
2929--- Deferred until concrete use case emerges (embeddings, semantic search)
3030--- Future use: content-based recommendations, duplicate detection
···11--- Revert notifications table back to using DIDs
22-33--- Step 1: Add back DID columns (nullable initially)
44-ALTER TABLE notifications
55-ADD COLUMN recipient_did TEXT,
66-ADD COLUMN author_did TEXT;
77-88--- Step 2: Populate DIDs from actor_ids by joining with actors table
99-UPDATE notifications n
1010-SET
1111- recipient_did = (SELECT did FROM actors WHERE id = n.recipient_actor_id),
1212- author_did = (SELECT did FROM actors WHERE id = n.author_actor_id);
1313-1414--- Step 3: Make DID columns NOT NULL
1515-ALTER TABLE notifications
1616-ALTER COLUMN recipient_did SET NOT NULL,
1717-ALTER COLUMN author_did SET NOT NULL;
1818-1919--- Step 4: Drop foreign key constraints
2020-ALTER TABLE notifications
2121-DROP CONSTRAINT IF EXISTS fk_notifications_recipient,
2222-DROP CONSTRAINT IF EXISTS fk_notifications_author;
2323-2424--- Step 5: Drop actor_id columns
2525-ALTER TABLE notifications
2626-DROP COLUMN recipient_actor_id,
2727-DROP COLUMN author_actor_id;
2828-2929--- Step 6: Recreate original indexes
3030-DROP INDEX IF EXISTS idx_notifications_recipient_actor_time;
3131-DROP INDEX IF EXISTS idx_notifications_author_actor;
3232-CREATE INDEX idx_notifications_recipient_time
3333-ON notifications(recipient_did, indexed_at DESC);
···11--- Refactor notifications table to use actor_ids instead of DIDs
22--- This improves performance and consistency with other tables
33-44--- Step 1: Add new actor_id columns (nullable initially for safe migration)
55-ALTER TABLE notifications
66-ADD COLUMN recipient_actor_id INT,
77-ADD COLUMN author_actor_id INT;
88-99--- Step 2: Populate the new columns from existing DIDs
1010--- This requires joining with the actors table
1111-UPDATE notifications n
1212-SET
1313- recipient_actor_id = (SELECT id FROM actors WHERE did = n.recipient_did),
1414- author_actor_id = (SELECT id FROM actors WHERE did = n.author_did);
1515-1616--- Step 3: Make the columns NOT NULL (after populating)
1717-ALTER TABLE notifications
1818-ALTER COLUMN recipient_actor_id SET NOT NULL,
1919-ALTER COLUMN author_actor_id SET NOT NULL;
2020-2121--- Step 4: Drop old DID columns
2222-ALTER TABLE notifications
2323-DROP COLUMN recipient_did,
2424-DROP COLUMN author_did;
2525-2626--- Step 5: Add foreign key constraints
2727-ALTER TABLE notifications
2828-ADD CONSTRAINT fk_notifications_recipient
2929- FOREIGN KEY (recipient_actor_id) REFERENCES actors(id) ON DELETE CASCADE,
3030-ADD CONSTRAINT fk_notifications_author
3131- FOREIGN KEY (author_actor_id) REFERENCES actors(id) ON DELETE CASCADE;
3232-3333--- Step 6: Recreate index for recipient queries (most common use case)
3434--- The old index was on (recipient_did, indexed_at DESC)
3535--- We replace it with (recipient_actor_id, indexed_at DESC)
3636-DROP INDEX IF EXISTS idx_notifications_recipient_time;
3737-CREATE INDEX idx_notifications_recipient_actor_time
3838-ON notifications(recipient_actor_id, indexed_at DESC);
3939-4040--- Step 7: Add index for author queries (for completeness)
4141-CREATE INDEX idx_notifications_author_actor
4242-ON notifications(author_actor_id);
···11--- Drop notification enums (reverse order of creation)
22-DROP TYPE IF EXISTS notification_record_type;
33-DROP TYPE IF EXISTS notification_reason;
···11--- Create notification reason enum
22--- This enum represents why a notification was created
33-CREATE TYPE notification_reason AS ENUM (
44- 'like', -- User liked a post
55- 'repost', -- User reposted a post
66- 'follow', -- User followed someone
77- 'reply', -- User replied to a post
88- 'mention', -- User mentioned someone in a post
99- 'quote' -- User quoted a post
1010-);
1111-1212--- Create record type enum for notification records
1313--- This enum represents the type of record that triggered the notification
1414-CREATE TYPE notification_record_type AS ENUM (
1515- 'like', -- app.bsky.feed.like
1616- 'repost', -- app.bsky.feed.repost
1717- 'follow', -- app.bsky.graph.follow
1818- 'post' -- app.bsky.feed.post (used for reply/mention/quote)
1919-);
···11--- Drop normalized notification columns (reverse order)
22-33-DROP INDEX IF EXISTS idx_notifications_subject_actor;
44-55-ALTER TABLE notifications
66-DROP COLUMN IF EXISTS subject_rkey,
77-DROP COLUMN IF EXISTS subject_record_type,
88-DROP COLUMN IF EXISTS subject_actor_id,
99-DROP COLUMN IF EXISTS record_cid_new,
1010-DROP COLUMN IF EXISTS record_rkey,
1111-DROP COLUMN IF EXISTS record_type,
1212-DROP COLUMN IF EXISTS reason_enum;
···11--- Add new normalized columns to notifications table
22--- These columns will replace uri and reason_subject with structured data
33-44--- Add reason enum column (will replace TEXT reason column)
55-ALTER TABLE notifications
66-ADD COLUMN reason_enum notification_reason;
77-88--- Add record fields (will replace uri: at://did/collection/rkey)
99-ALTER TABLE notifications
1010-ADD COLUMN record_type notification_record_type,
1111-ADD COLUMN record_rkey TEXT,
1212-ADD COLUMN record_cid_new TEXT; -- Temporary name to avoid conflict with existing cid
1313-1414--- Add subject fields (will replace reason_subject: at://did/collection/rkey)
1515-ALTER TABLE notifications
1616-ADD COLUMN subject_actor_id INT REFERENCES actors(id) ON DELETE CASCADE,
1717-ADD COLUMN subject_record_type notification_record_type,
1818-ADD COLUMN subject_rkey TEXT;
1919-2020--- Create index on subject_actor_id for efficient lookups
2121-CREATE INDEX idx_notifications_subject_actor ON notifications(subject_actor_id);
2222-2323--- Note: Columns are nullable during migration. They will be populated in the next migration
2424--- and then made NOT NULL (where appropriate) in the finalization migration.
···11--- Reverse the population by setting all normalized columns back to NULL
22--- This allows us to re-run the population logic if needed
33-44-UPDATE notifications
55-SET
66- reason_enum = NULL,
77- record_type = NULL,
88- record_rkey = NULL,
99- record_cid_new = NULL,
1010- subject_actor_id = NULL,
1111- subject_record_type = NULL,
1212- subject_rkey = NULL;
···11--- Populate normalized notification columns from existing URI data
22---
33--- This migration parses AT-URIs and populates the new structured columns.
44--- AT-URI format: at://did:plc:xxx/collection/rkey
55-66--- Step 1: Populate reason_enum from existing TEXT reason column
77-UPDATE notifications
88-SET reason_enum = reason::notification_reason;
99-1010--- Step 2: Populate record fields from uri column
1111--- Extract rkey (last path component after final /)
1212-UPDATE notifications
1313-SET record_rkey = substring(uri from '[^/]+$');
1414-1515--- Copy CID to new column
1616-UPDATE notifications
1717-SET record_cid_new = cid;
1818-1919--- Determine record_type from collection in URI
2020-UPDATE notifications
2121-SET record_type = CASE
2222- WHEN uri LIKE '%/app.bsky.feed.like/%' THEN 'like'::notification_record_type
2323- WHEN uri LIKE '%/app.bsky.feed.repost/%' THEN 'repost'::notification_record_type
2424- WHEN uri LIKE '%/app.bsky.graph.follow/%' THEN 'follow'::notification_record_type
2525- WHEN uri LIKE '%/app.bsky.feed.post/%' THEN 'post'::notification_record_type
2626- ELSE NULL
2727-END;
2828-2929--- Step 3: Populate subject fields from reason_subject column
3030--- Extract rkey from reason_subject
3131-UPDATE notifications
3232-SET subject_rkey = substring(reason_subject from '[^/]+$')
3333-WHERE reason_subject IS NOT NULL;
3434-3535--- Determine subject_record_type from collection in reason_subject
3636-UPDATE notifications
3737-SET subject_record_type = CASE
3838- WHEN reason_subject LIKE '%/app.bsky.feed.post/%' THEN 'post'::notification_record_type
3939- WHEN reason_subject LIKE '%/app.bsky.feed.like/%' THEN 'like'::notification_record_type
4040- WHEN reason_subject LIKE '%/app.bsky.feed.repost/%' THEN 'repost'::notification_record_type
4141- ELSE NULL
4242-END
4343-WHERE reason_subject IS NOT NULL;
4444-4545--- Resolve subject_actor_id from DID in reason_subject
4646--- Extract DID: at://did:plc:xxx/...
4747-UPDATE notifications n
4848-SET subject_actor_id = (
4949- SELECT a.id
5050- FROM actors a
5151- WHERE a.did = substring(n.reason_subject from 'at://([^/]+)')
5252-)
5353-WHERE n.reason_subject IS NOT NULL;
5454-5555--- Verification queries (run these to check data integrity):
5656--- SELECT COUNT(*) FROM notifications WHERE reason_enum IS NULL; -- Should be 0
5757--- SELECT COUNT(*) FROM notifications WHERE record_type IS NULL; -- Should be 0
5858--- SELECT COUNT(*) FROM notifications WHERE record_rkey IS NULL; -- Should be 0
5959--- SELECT COUNT(*) FROM notifications WHERE reason_subject IS NOT NULL AND subject_actor_id IS NULL; -- Orphaned subjects
···11--- Reverse finalization by restoring old columns
22--- WARNING: This will lose data since we can't perfectly reconstruct URIs
33--- This is mainly for development/testing rollback scenarios
44-55--- Step 1: Drop unique constraint
66-ALTER TABLE notifications
77-DROP CONSTRAINT IF EXISTS notifications_unique_record;
88-99--- Step 2: Rename columns back to temporary names
1010-ALTER TABLE notifications
1111-RENAME COLUMN reason TO reason_enum;
1212-1313-ALTER TABLE notifications
1414-RENAME COLUMN record_cid TO record_cid_new;
1515-1616--- Step 3: Add back old columns (nullable for now)
1717-ALTER TABLE notifications
1818-ADD COLUMN uri TEXT,
1919-ADD COLUMN cid TEXT,
2020-ADD COLUMN reason TEXT,
2121-ADD COLUMN reason_subject TEXT;
2222-2323--- Step 4: Attempt to reconstruct URIs (will be incomplete - DIDs are lost!)
2424--- NOTE: This won't work perfectly because we don't have DIDs anymore
2525--- In a real rollback scenario, you'd need to join with actors table
2626-UPDATE notifications
2727-SET
2828- cid = record_cid_new,
2929- reason = reason_enum::TEXT;
3030-3131--- NOTE: uri and reason_subject cannot be fully reconstructed without DIDs
3232--- These would need to be populated from actors table joins
3333--- For now, mark them as incomplete
3434-UPDATE notifications
3535-SET uri = 'INCOMPLETE:' || record_type::TEXT || '/' || record_rkey;
3636-3737--- Step 5: Make old columns NOT NULL (if they were before)
3838--- Skipped because we can't fully reconstruct the data
3939-4040--- Step 6: Recreate uri index
4141-CREATE UNIQUE INDEX notifications_uri_key ON notifications(uri);
···11--- Finalize notification normalization
22--- Make new columns NOT NULL, drop old columns, add constraints
33-44--- Step 1: Make required columns NOT NULL
55-ALTER TABLE notifications
66-ALTER COLUMN reason_enum SET NOT NULL,
77-ALTER COLUMN record_type SET NOT NULL,
88-ALTER COLUMN record_rkey SET NOT NULL,
99-ALTER COLUMN record_cid_new SET NOT NULL;
1010-1111--- Step 2: Drop old columns that have been replaced
1212-ALTER TABLE notifications
1313-DROP COLUMN uri,
1414-DROP COLUMN cid,
1515-DROP COLUMN reason,
1616-DROP COLUMN reason_subject;
1717-1818--- Step 3: Rename record_cid_new to record_cid (now that old cid is gone)
1919-ALTER TABLE notifications
2020-RENAME COLUMN record_cid_new TO record_cid;
2121-2222--- Step 4: Rename reason_enum to reason (now that old reason is gone)
2323-ALTER TABLE notifications
2424-RENAME COLUMN reason_enum TO reason;
2525-2626--- Step 5: Add unique constraint on (author_actor_id, record_type, record_rkey)
2727--- This replaces the old uri uniqueness constraint
2828-ALTER TABLE notifications
2929-ADD CONSTRAINT notifications_unique_record
3030-UNIQUE (author_actor_id, record_type, record_rkey);
3131-3232--- Step 6: Drop old uri index (no longer needed)
3333-DROP INDEX IF EXISTS idx_notifications_uri;
3434-3535--- Final schema:
3636--- notifications (
3737--- id BIGSERIAL PRIMARY KEY,
3838--- recipient_actor_id INT NOT NULL,
3939--- author_actor_id INT NOT NULL,
4040--- indexed_at TIMESTAMPTZ NOT NULL,
4141--- reason notification_reason NOT NULL,
4242--- record_type notification_record_type NOT NULL,
4343--- record_rkey TEXT NOT NULL,
4444--- record_cid TEXT NOT NULL,
4545--- subject_actor_id INT (nullable),
4646--- subject_record_type notification_record_type (nullable),
4747--- subject_rkey TEXT (nullable),
4848--- UNIQUE (author_actor_id, record_type, record_rkey)
4949--- )
···11--- Normalize notification_state table to use actor_id instead of DID
22--- This migration changes the primary key from DID (text) to actor_id (integer)
33-44--- Step 1: Add actor_id column
55-ALTER TABLE notification_state
66-ADD COLUMN actor_id INT REFERENCES actors(id) ON DELETE CASCADE;
77-88--- Step 2: Populate actor_id from DIDs
99-UPDATE notification_state ns
1010-SET actor_id = a.id
1111-FROM actors a
1212-WHERE ns.did = a.did;
1313-1414--- Step 3: Make actor_id NOT NULL (will fail if any DIDs couldn't be resolved)
1515-ALTER TABLE notification_state
1616-ALTER COLUMN actor_id SET NOT NULL;
1717-1818--- Step 4: Drop old primary key and DID column
1919-ALTER TABLE notification_state
2020-DROP CONSTRAINT notification_state_pkey,
2121-DROP COLUMN did;
2222-2323--- Step 5: Add new primary key on actor_id
2424-ALTER TABLE notification_state
2525-ADD PRIMARY KEY (actor_id);
···11--- Optimize notification storage to use INT8 (TID rkeys) and BYTEA (CID digests)
22--- This matches the pattern used by posts, likes, reposts, and follows tables
33--- Storage savings: TID TEXT→INT8 (38% smaller), CID TEXT→BYTEA (46% smaller)
44-55--- WARNING: This migration truncates the notifications table
66--- Notifications will be regenerated naturally as new activity occurs
77-88--- Step 1: Truncate existing notifications (they'll be regenerated)
99-TRUNCATE TABLE notifications;
1010-1111--- Step 2: Drop old columns
1212-ALTER TABLE notifications
1313-DROP COLUMN record_rkey,
1414-DROP COLUMN record_cid,
1515-DROP COLUMN subject_rkey;
1616-1717--- Step 3: Add new optimized columns
1818-ALTER TABLE notifications
1919-ADD COLUMN record_rkey INT8 NOT NULL,
2020-ADD COLUMN record_cid BYTEA NOT NULL,
2121-ADD COLUMN subject_rkey INT8;
2222-2323--- Step 4: Recreate unique constraint with new column types
2424-ALTER TABLE notifications
2525-DROP CONSTRAINT IF EXISTS notifications_unique_record;
2626-ALTER TABLE notifications
2727-ADD CONSTRAINT notifications_unique_record
2828-UNIQUE (author_actor_id, record_type, record_rkey);
2929-3030--- Add documentation comments
3131-COMMENT ON COLUMN notifications.record_rkey IS 'TID as INT8 (convert to string via i64_to_tid())';
3232-COMMENT ON COLUMN notifications.record_cid IS '32-byte CID digest (header stripped, reconstruct via digest_to_record_cid())';
3333-COMMENT ON COLUMN notifications.subject_rkey IS 'Optional TID as INT8 for subject record';
3434-3535--- Final schema:
3636--- notifications (
3737--- id BIGSERIAL PRIMARY KEY,
3838--- recipient_actor_id INT NOT NULL REFERENCES actors(id),
3939--- author_actor_id INT NOT NULL REFERENCES actors(id),
4040--- indexed_at TIMESTAMPTZ NOT NULL,
4141--- reason notification_reason NOT NULL,
4242--- record_type notification_record_type NOT NULL,
4343--- record_rkey INT8 NOT NULL, -- ✅ OPTIMIZED: was TEXT
4444--- record_cid BYTEA NOT NULL, -- ✅ OPTIMIZED: was TEXT
4545--- subject_actor_id INT REFERENCES actors(id),
4646--- subject_record_type notification_record_type,
4747--- subject_rkey INT8, -- ✅ OPTIMIZED: was TEXT
4848--- UNIQUE (author_actor_id, record_type, record_rkey)
4949--- )