Rust AppView - highly experimental!

feat: denormalize strategy; labels to arrays

Changed files
+134 -35
migrations
2025-12-08-033410_labels_to_arrays
parakeet
src
loaders
parakeet-db
+56
migrations/2025-12-08-033410_labels_to_arrays/down.sql
··· 1 + -- Rollback Phase 7: Restore labels table from arrays 2 + 3 + -- Recreate labels table 4 + CREATE TABLE labels ( 5 + labeler_actor_id INTEGER NOT NULL, 6 + label TEXT NOT NULL, 7 + uri TEXT NOT NULL, 8 + self_label BOOLEAN NOT NULL DEFAULT false, 9 + cid BYTEA, 10 + negated BOOLEAN NOT NULL DEFAULT false, 11 + expires TIMESTAMP WITH TIME ZONE, 12 + sig BYTEA, 13 + created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(), 14 + PRIMARY KEY (labeler_actor_id, label, uri) 15 + ); 16 + 17 + -- Recreate indexes 18 + CREATE INDEX idx_labels_uri ON labels(uri); 19 + CREATE INDEX idx_labels_expires ON labels(expires) WHERE expires IS NOT NULL; 20 + 21 + -- Backfill post labels 22 + INSERT INTO labels (labeler_actor_id, label, uri, self_label, negated, expires, created_at) 23 + SELECT 24 + (label_data).labeler_actor_id, 25 + (label_data).label, 26 + 'at://' || a.did || '/app.bsky.feed.post/' || i64_to_tid(p.rkey), 27 + false, -- self_label - not stored in composite type 28 + (label_data).negated, 29 + (label_data).expires, 30 + (label_data).created_at 31 + FROM posts p 32 + INNER JOIN actors a ON p.actor_id = a.id 33 + CROSS JOIN LATERAL UNNEST(p.labels) AS label_data 34 + WHERE p.labels IS NOT NULL; 35 + 36 + -- Backfill actor labels 37 + INSERT INTO labels (labeler_actor_id, label, uri, self_label, negated, expires, created_at) 38 + SELECT 39 + (label_data).labeler_actor_id, 40 + (label_data).label, 41 + 'at://' || a.did || '/app.bsky.actor.profile/self', 42 + false, -- self_label - not stored in composite type 43 + (label_data).negated, 44 + (label_data).expires, 45 + (label_data).created_at 46 + FROM actors a 47 + CROSS JOIN LATERAL UNNEST(a.labels) AS label_data 48 + WHERE a.labels IS NOT NULL; 49 + 50 + -- Drop GIN indexes 51 + DROP INDEX idx_posts_labels_gin; 52 + DROP INDEX idx_actors_labels_gin; 53 + 54 + -- Drop label columns 55 + ALTER TABLE posts DROP COLUMN labels; 56 + ALTER TABLE actors DROP COLUMN labels;
+56
migrations/2025-12-08-033410_labels_to_arrays/up.sql
··· 1 + -- Phase 7: Denormalize Labels into Arrays 2 + -- 3 + -- This migration: 4 + -- 1. Adds labels arrays to posts and actors tables 5 + -- 2. Backfills labels from the labels table 6 + -- 3. Drops the labels table 7 + -- 8 + -- Labels are stored as composite types (post_label, actor_label) in arrays 9 + 10 + -- Step 1: Add labels columns 11 + ALTER TABLE posts ADD COLUMN labels post_label[]; 12 + ALTER TABLE actors ADD COLUMN labels actor_label[]; 13 + 14 + -- Step 2: Backfill posts labels 15 + -- Join labels table with actors to get DIDs, then construct AT URIs for posts 16 + UPDATE posts p 17 + SET labels = ( 18 + SELECT ARRAY_AGG( 19 + ROW( 20 + l.labeler_actor_id, 21 + l.label, 22 + l.created_at, 23 + l.negated, 24 + l.expires 25 + )::post_label 26 + ORDER BY l.created_at 27 + ) 28 + FROM labels l 29 + INNER JOIN actors a ON p.actor_id = a.id 30 + WHERE l.uri = 'at://' || a.did || '/app.bsky.feed.post/' || i64_to_tid(p.rkey) 31 + ); 32 + 33 + -- Step 3: Backfill actors labels 34 + -- Note: Actor labels use the profile record URI: at://{did}/app.bsky.actor.profile/self 35 + UPDATE actors a 36 + SET labels = ( 37 + SELECT ARRAY_AGG( 38 + ROW( 39 + l.labeler_actor_id, 40 + l.label, 41 + l.created_at, 42 + l.negated, 43 + l.expires 44 + )::actor_label 45 + ORDER BY l.created_at 46 + ) 47 + FROM labels l 48 + WHERE l.uri = 'at://' || a.did || '/app.bsky.actor.profile/self' 49 + ); 50 + 51 + -- Step 4: Create GIN indexes for fast array searches 52 + CREATE INDEX idx_posts_labels_gin ON posts USING GIN (labels); 53 + CREATE INDEX idx_actors_labels_gin ON actors USING GIN (labels); 54 + 55 + -- Step 5: Drop old labels table 56 + DROP TABLE labels;
+2 -20
parakeet-db/src/composite_types.rs
··· 28 28 LabelerDefRecord, FollowRecord, 29 29 MuteRecord, BlockRecord, BookmarkRecord, // Phase 4: User preferences (diesel-generated) 30 30 ThreadMuteRecord, ListMuteRecord, ListBlockRecord, // Phase 5: List moderation (diesel-generated) 31 + PostLabel, ActorLabel, // Phase 7: Label composite types (diesel-generated) 31 32 }; 32 - 33 - // Placeholder SQL types for composite types not yet used in tables 34 - // NOTE: Once columns are added with these composite types, diesel will auto-generate the SQL types 35 - #[allow(dead_code)] 36 - mod placeholder_sql_types { 37 - use diesel::query_builder::QueryId; 38 - use diesel::sql_types::SqlType; 39 - 40 - // Phase 7: Label composite types (will be added later) 41 - #[derive(QueryId, SqlType)] 42 - #[diesel(postgres_type(name = "post_label"))] 43 - pub struct PostLabel; 44 - 45 - #[derive(QueryId, SqlType)] 46 - #[diesel(postgres_type(name = "actor_label"))] 47 - pub struct ActorLabel; 48 - } 49 - 50 - use placeholder_sql_types::*; 51 33 use crate::types::{ 52 34 ImageMimeType, VideoMimeType, FacetType, CaptionMimeType, LanguageCode, 53 - LabelSeverity, LabelBlurs, LabelDefaultSetting, 35 + LabelSeverity, LabelBlurs, LabelDefaultSetting, // For labeler_def_record 54 36 }; 55 37 use chrono::{DateTime, Utc}; 56 38 use diesel::deserialize::{self, FromSql};
+4
parakeet-db/src/models.rs
··· 176 176 // Phase 6: RKey arrays for quick lookups 177 177 pub post_rkeys: Option<Vec<Option<i64>>>, // All post rkeys owned by this actor 178 178 pub repost_rkeys: Option<Vec<Option<i64>>>, // All repost rkeys owned by this actor 179 + // Phase 7: Labels array (from labels table, denormalized) 180 + pub labels: Option<Vec<Option<crate::composite_types::ActorLabelRecord>>>, // Labels applied to this actor 179 181 } 180 182 181 183 // AllowlistEntry model removed - allowlist table dropped in favor of actors.sync_state ··· 254 256 pub postgate_rules: Option<array_helpers::PostgateRuleArray>, // Rules (maxLength: 5) 255 257 pub postgate_detached_actor_ids: Option<Vec<Option<i32>>>, // Detached embed authors (maxLength: 50) 256 258 pub postgate_detached_rkeys: Option<Vec<Option<i64>>>, // Detached embed rkeys (parallel array) 259 + // Phase 7: Labels array (from labels table, denormalized) 260 + pub labels: Option<Vec<Option<crate::composite_types::PostLabelRecord>>>, // Labels applied to this post 257 261 // Note: created_at derived from TID rkey via created_at() method 258 262 } 259 263
+12 -15
parakeet-db/src/schema.rs
··· 2 2 3 3 pub mod sql_types { 4 4 #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] 5 + #[diesel(postgres_type(name = "actor_label"))] 6 + pub struct ActorLabel; 7 + 8 + #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] 5 9 #[diesel(postgres_type(name = "actor_status"))] 6 10 pub struct ActorStatus; 7 11 ··· 114 118 pub struct PostImageEmbed; 115 119 116 120 #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] 121 + #[diesel(postgres_type(name = "post_label"))] 122 + pub struct PostLabel; 123 + 124 + #[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)] 117 125 #[diesel(postgres_type(name = "post_stat_type"))] 118 126 pub struct PostStatType; 119 127 ··· 206 214 use super::sql_types::ThreadMuteRecord; 207 215 use super::sql_types::ListMuteRecord; 208 216 use super::sql_types::ListBlockRecord; 217 + use super::sql_types::ActorLabel; 209 218 210 219 actors (id) { 211 220 id -> Int4, ··· 266 275 list_blocks -> Nullable<Array<Nullable<ListBlockRecord>>>, 267 276 post_rkeys -> Nullable<Array<Nullable<Int8>>>, 268 277 repost_rkeys -> Nullable<Array<Nullable<Int8>>>, 278 + labels -> Nullable<Array<Nullable<ActorLabel>>>, 269 279 } 270 280 } 271 281 ··· 362 372 } 363 373 364 374 diesel::table! { 365 - labels (labeler_actor_id, label, uri) { 366 - labeler_actor_id -> Int4, 367 - label -> Text, 368 - uri -> Text, 369 - self_label -> Bool, 370 - cid -> Nullable<Bytea>, 371 - negated -> Bool, 372 - expires -> Nullable<Timestamptz>, 373 - sig -> Nullable<Bytea>, 374 - created_at -> Timestamptz, 375 - } 376 - } 377 - 378 - diesel::table! { 379 375 list_items (actor_id, rkey) { 380 376 actor_id -> Int4, 381 377 rkey -> Int8, ··· 449 445 use super::sql_types::PostFacetEmbed; 450 446 use super::sql_types::ThreadgateRule; 451 447 use super::sql_types::PostgateRule; 448 + use super::sql_types::PostLabel; 452 449 453 450 posts (actor_id, rkey) { 454 451 actor_id -> Int4, ··· 499 496 postgate_rules -> Nullable<Array<Nullable<PostgateRule>>>, 500 497 postgate_detached_actor_ids -> Nullable<Array<Nullable<Int4>>>, 501 498 postgate_detached_rkeys -> Nullable<Array<Nullable<Int8>>>, 499 + labels -> Nullable<Array<Nullable<PostLabel>>>, 502 500 } 503 501 } 504 502 ··· 591 589 fetch_queue, 592 590 handle_resolution_queue, 593 591 jetstream_cursors, 594 - labels, 595 592 list_items, 596 593 lists, 597 594 notifications,
+2
parakeet-db/src/types.rs
··· 453 453 Chat = "chat", 454 454 }); 455 455 456 + // Label configuration enums (used in labeler_def_record composite type) 457 + // Note: These are different from label instance data (post_label, actor_label) 456 458 diesel_enum!(LabelSeverity, LabelSeverity, { 457 459 Inform = "inform", 458 460 Alert = "alert",
+2
parakeet/src/loaders/post.rs
··· 936 936 postgate_rules: None, 937 937 postgate_detached_actor_ids: None, 938 938 postgate_detached_rkeys: None, 939 + // Phase 7: Labels (not loaded for hydration - will be loaded separately if needed) 940 + labels: None, 939 941 }; 940 942 941 943 // Encode TIDs using Rust utility functions