+56
migrations/2025-12-08-033410_labels_to_arrays/down.sql
+56
migrations/2025-12-08-033410_labels_to_arrays/down.sql
···
1
+
-- Rollback Phase 7: Restore labels table from arrays
2
+
3
+
-- Recreate labels table
4
+
CREATE TABLE labels (
5
+
labeler_actor_id INTEGER NOT NULL,
6
+
label TEXT NOT NULL,
7
+
uri TEXT NOT NULL,
8
+
self_label BOOLEAN NOT NULL DEFAULT false,
9
+
cid BYTEA,
10
+
negated BOOLEAN NOT NULL DEFAULT false,
11
+
expires TIMESTAMP WITH TIME ZONE,
12
+
sig BYTEA,
13
+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
14
+
PRIMARY KEY (labeler_actor_id, label, uri)
15
+
);
16
+
17
+
-- Recreate indexes
18
+
CREATE INDEX idx_labels_uri ON labels(uri);
19
+
CREATE INDEX idx_labels_expires ON labels(expires) WHERE expires IS NOT NULL;
20
+
21
+
-- Backfill post labels
22
+
INSERT INTO labels (labeler_actor_id, label, uri, self_label, negated, expires, created_at)
23
+
SELECT
24
+
(label_data).labeler_actor_id,
25
+
(label_data).label,
26
+
'at://' || a.did || '/app.bsky.feed.post/' || i64_to_tid(p.rkey),
27
+
false, -- self_label - not stored in composite type
28
+
(label_data).negated,
29
+
(label_data).expires,
30
+
(label_data).created_at
31
+
FROM posts p
32
+
INNER JOIN actors a ON p.actor_id = a.id
33
+
CROSS JOIN LATERAL UNNEST(p.labels) AS label_data
34
+
WHERE p.labels IS NOT NULL;
35
+
36
+
-- Backfill actor labels
37
+
INSERT INTO labels (labeler_actor_id, label, uri, self_label, negated, expires, created_at)
38
+
SELECT
39
+
(label_data).labeler_actor_id,
40
+
(label_data).label,
41
+
'at://' || a.did || '/app.bsky.actor.profile/self',
42
+
false, -- self_label - not stored in composite type
43
+
(label_data).negated,
44
+
(label_data).expires,
45
+
(label_data).created_at
46
+
FROM actors a
47
+
CROSS JOIN LATERAL UNNEST(a.labels) AS label_data
48
+
WHERE a.labels IS NOT NULL;
49
+
50
+
-- Drop GIN indexes
51
+
DROP INDEX idx_posts_labels_gin;
52
+
DROP INDEX idx_actors_labels_gin;
53
+
54
+
-- Drop label columns
55
+
ALTER TABLE posts DROP COLUMN labels;
56
+
ALTER TABLE actors DROP COLUMN labels;
+56
migrations/2025-12-08-033410_labels_to_arrays/up.sql
+56
migrations/2025-12-08-033410_labels_to_arrays/up.sql
···
1
+
-- Phase 7: Denormalize Labels into Arrays
2
+
--
3
+
-- This migration:
4
+
-- 1. Adds labels arrays to posts and actors tables
5
+
-- 2. Backfills labels from the labels table
6
+
-- 3. Drops the labels table
7
+
--
8
+
-- Labels are stored as composite types (post_label, actor_label) in arrays
9
+
10
+
-- Step 1: Add labels columns
11
+
ALTER TABLE posts ADD COLUMN labels post_label[];
12
+
ALTER TABLE actors ADD COLUMN labels actor_label[];
13
+
14
+
-- Step 2: Backfill posts labels
15
+
-- Join labels table with actors to get DIDs, then construct AT URIs for posts
16
+
UPDATE posts p
17
+
SET labels = (
18
+
SELECT ARRAY_AGG(
19
+
ROW(
20
+
l.labeler_actor_id,
21
+
l.label,
22
+
l.created_at,
23
+
l.negated,
24
+
l.expires
25
+
)::post_label
26
+
ORDER BY l.created_at
27
+
)
28
+
FROM labels l
29
+
INNER JOIN actors a ON p.actor_id = a.id
30
+
WHERE l.uri = 'at://' || a.did || '/app.bsky.feed.post/' || i64_to_tid(p.rkey)
31
+
);
32
+
33
+
-- Step 3: Backfill actors labels
34
+
-- Note: Actor labels use the profile record URI: at://{did}/app.bsky.actor.profile/self
35
+
UPDATE actors a
36
+
SET labels = (
37
+
SELECT ARRAY_AGG(
38
+
ROW(
39
+
l.labeler_actor_id,
40
+
l.label,
41
+
l.created_at,
42
+
l.negated,
43
+
l.expires
44
+
)::actor_label
45
+
ORDER BY l.created_at
46
+
)
47
+
FROM labels l
48
+
WHERE l.uri = 'at://' || a.did || '/app.bsky.actor.profile/self'
49
+
);
50
+
51
+
-- Step 4: Create GIN indexes for fast array searches
52
+
CREATE INDEX idx_posts_labels_gin ON posts USING GIN (labels);
53
+
CREATE INDEX idx_actors_labels_gin ON actors USING GIN (labels);
54
+
55
+
-- Step 5: Drop old labels table
56
+
DROP TABLE labels;
+2
-20
parakeet-db/src/composite_types.rs
+2
-20
parakeet-db/src/composite_types.rs
···
28
28
LabelerDefRecord, FollowRecord,
29
29
MuteRecord, BlockRecord, BookmarkRecord, // Phase 4: User preferences (diesel-generated)
30
30
ThreadMuteRecord, ListMuteRecord, ListBlockRecord, // Phase 5: List moderation (diesel-generated)
31
+
PostLabel, ActorLabel, // Phase 7: Label composite types (diesel-generated)
31
32
};
32
-
33
-
// Placeholder SQL types for composite types not yet used in tables
34
-
// NOTE: Once columns are added with these composite types, diesel will auto-generate the SQL types
35
-
#[allow(dead_code)]
36
-
mod placeholder_sql_types {
37
-
use diesel::query_builder::QueryId;
38
-
use diesel::sql_types::SqlType;
39
-
40
-
// Phase 7: Label composite types (will be added later)
41
-
#[derive(QueryId, SqlType)]
42
-
#[diesel(postgres_type(name = "post_label"))]
43
-
pub struct PostLabel;
44
-
45
-
#[derive(QueryId, SqlType)]
46
-
#[diesel(postgres_type(name = "actor_label"))]
47
-
pub struct ActorLabel;
48
-
}
49
-
50
-
use placeholder_sql_types::*;
51
33
use crate::types::{
52
34
ImageMimeType, VideoMimeType, FacetType, CaptionMimeType, LanguageCode,
53
-
LabelSeverity, LabelBlurs, LabelDefaultSetting,
35
+
LabelSeverity, LabelBlurs, LabelDefaultSetting, // For labeler_def_record
54
36
};
55
37
use chrono::{DateTime, Utc};
56
38
use diesel::deserialize::{self, FromSql};
+4
parakeet-db/src/models.rs
+4
parakeet-db/src/models.rs
···
176
176
// Phase 6: RKey arrays for quick lookups
177
177
pub post_rkeys: Option<Vec<Option<i64>>>, // All post rkeys owned by this actor
178
178
pub repost_rkeys: Option<Vec<Option<i64>>>, // All repost rkeys owned by this actor
179
+
// Phase 7: Labels array (from labels table, denormalized)
180
+
pub labels: Option<Vec<Option<crate::composite_types::ActorLabelRecord>>>, // Labels applied to this actor
179
181
}
180
182
181
183
// AllowlistEntry model removed - allowlist table dropped in favor of actors.sync_state
···
254
256
pub postgate_rules: Option<array_helpers::PostgateRuleArray>, // Rules (maxLength: 5)
255
257
pub postgate_detached_actor_ids: Option<Vec<Option<i32>>>, // Detached embed authors (maxLength: 50)
256
258
pub postgate_detached_rkeys: Option<Vec<Option<i64>>>, // Detached embed rkeys (parallel array)
259
+
// Phase 7: Labels array (from labels table, denormalized)
260
+
pub labels: Option<Vec<Option<crate::composite_types::PostLabelRecord>>>, // Labels applied to this post
257
261
// Note: created_at derived from TID rkey via created_at() method
258
262
}
259
263
+12
-15
parakeet-db/src/schema.rs
+12
-15
parakeet-db/src/schema.rs
···
2
2
3
3
pub mod sql_types {
4
4
#[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)]
5
+
#[diesel(postgres_type(name = "actor_label"))]
6
+
pub struct ActorLabel;
7
+
8
+
#[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)]
5
9
#[diesel(postgres_type(name = "actor_status"))]
6
10
pub struct ActorStatus;
7
11
···
114
118
pub struct PostImageEmbed;
115
119
116
120
#[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)]
121
+
#[diesel(postgres_type(name = "post_label"))]
122
+
pub struct PostLabel;
123
+
124
+
#[derive(diesel::query_builder::QueryId, diesel::sql_types::SqlType)]
117
125
#[diesel(postgres_type(name = "post_stat_type"))]
118
126
pub struct PostStatType;
119
127
···
206
214
use super::sql_types::ThreadMuteRecord;
207
215
use super::sql_types::ListMuteRecord;
208
216
use super::sql_types::ListBlockRecord;
217
+
use super::sql_types::ActorLabel;
209
218
210
219
actors (id) {
211
220
id -> Int4,
···
266
275
list_blocks -> Nullable<Array<Nullable<ListBlockRecord>>>,
267
276
post_rkeys -> Nullable<Array<Nullable<Int8>>>,
268
277
repost_rkeys -> Nullable<Array<Nullable<Int8>>>,
278
+
labels -> Nullable<Array<Nullable<ActorLabel>>>,
269
279
}
270
280
}
271
281
···
362
372
}
363
373
364
374
diesel::table! {
365
-
labels (labeler_actor_id, label, uri) {
366
-
labeler_actor_id -> Int4,
367
-
label -> Text,
368
-
uri -> Text,
369
-
self_label -> Bool,
370
-
cid -> Nullable<Bytea>,
371
-
negated -> Bool,
372
-
expires -> Nullable<Timestamptz>,
373
-
sig -> Nullable<Bytea>,
374
-
created_at -> Timestamptz,
375
-
}
376
-
}
377
-
378
-
diesel::table! {
379
375
list_items (actor_id, rkey) {
380
376
actor_id -> Int4,
381
377
rkey -> Int8,
···
449
445
use super::sql_types::PostFacetEmbed;
450
446
use super::sql_types::ThreadgateRule;
451
447
use super::sql_types::PostgateRule;
448
+
use super::sql_types::PostLabel;
452
449
453
450
posts (actor_id, rkey) {
454
451
actor_id -> Int4,
···
499
496
postgate_rules -> Nullable<Array<Nullable<PostgateRule>>>,
500
497
postgate_detached_actor_ids -> Nullable<Array<Nullable<Int4>>>,
501
498
postgate_detached_rkeys -> Nullable<Array<Nullable<Int8>>>,
499
+
labels -> Nullable<Array<Nullable<PostLabel>>>,
502
500
}
503
501
}
504
502
···
591
589
fetch_queue,
592
590
handle_resolution_queue,
593
591
jetstream_cursors,
594
-
labels,
595
592
list_items,
596
593
lists,
597
594
notifications,
+2
parakeet-db/src/types.rs
+2
parakeet-db/src/types.rs
···
453
453
Chat = "chat",
454
454
});
455
455
456
+
// Label configuration enums (used in labeler_def_record composite type)
457
+
// Note: These are different from label instance data (post_label, actor_label)
456
458
diesel_enum!(LabelSeverity, LabelSeverity, {
457
459
Inform = "inform",
458
460
Alert = "alert",
+2
parakeet/src/loaders/post.rs
+2
parakeet/src/loaders/post.rs