+6
-1
consumer/src/backfill/mod.rs
+6
-1
consumer/src/backfill/mod.rs
···
275
275
follows: Vec<(String, String, DateTime<Utc>)>,
276
276
list_items: Vec<(String, records::AppBskyGraphListItem)>,
277
277
verifications: Vec<(String, Cid, records::AppBskyGraphVerification)>,
278
+
threadgates: Vec<(String, Cid, records::AppBskyFeedThreadgate)>, // not COPY'd but needs to be kept until last.
278
279
records: Vec<(String, Cid)>,
279
280
}
280
281
281
282
impl CopyStore {
282
283
async fn submit(self, t: &mut Transaction<'_>, did: &str) -> Result<(), tokio_postgres::Error> {
283
284
db::copy::copy_likes(t, did, self.likes).await?;
284
-
db::copy::copy_posts(t, did, self.posts).await?;
285
285
db::copy::copy_reposts(t, did, self.reposts).await?;
286
286
db::copy::copy_blocks(t, did, self.blocks).await?;
287
287
db::copy::copy_follows(t, did, self.follows).await?;
288
288
db::copy::copy_list_items(t, self.list_items).await?;
289
289
db::copy::copy_verification(t, did, self.verifications).await?;
290
+
db::copy::copy_posts(t, did, self.posts).await?;
291
+
for (at_uri, cid, record) in self.threadgates {
292
+
db::threadgate_enforce_backfill(t, did, &record).await?;
293
+
db::threadgate_upsert(t, &at_uri, cid, record).await?;
294
+
}
290
295
db::copy::copy_records(t, did, self.records).await?;
291
296
292
297
Ok(())
+11
-1
consumer/src/backfill/repo.rs
+11
-1
consumer/src/backfill/repo.rs
···
4
4
};
5
5
use crate::indexer::records;
6
6
use crate::indexer::types::{AggregateDeltaStore, RecordTypes};
7
+
use crate::utils::at_uri_is_by;
7
8
use crate::{db, indexer};
8
9
use deadpool_postgres::Transaction;
9
10
use ipld_core::cid::Cid;
···
144
145
db::maintain_self_labels(t, did, Some(cid), &at_uri, labels).await?;
145
146
}
146
147
if let Some(embed) = rec.embed.clone().and_then(|embed| embed.into_bsky()) {
147
-
db::post_embed_insert(t, &at_uri, embed, rec.created_at).await?;
148
+
db::post_embed_insert(t, &at_uri, embed, rec.created_at, true).await?;
148
149
}
149
150
150
151
deltas.incr(did, AggregateType::ProfilePost).await;
···
165
166
copies
166
167
.reposts
167
168
.push((rkey.to_string(), rec.subject, rec.via, rec.created_at));
169
+
}
170
+
RecordTypes::AppBskyFeedThreadgate(record) => {
171
+
if !at_uri_is_by(&record.post, did) {
172
+
tracing::warn!("tried to create a threadgate on a post we don't control!");
173
+
return Ok(());
174
+
}
175
+
176
+
copies.push_record(&at_uri, cid);
177
+
copies.threadgates.push((at_uri, cid, record));
168
178
}
169
179
RecordTypes::AppBskyGraphBlock(rec) => {
170
180
copies.push_record(&at_uri, cid);
+38
-3
consumer/src/db/copy.rs
+38
-3
consumer/src/db/copy.rs
···
1
1
use super::PgExecResult;
2
2
use crate::indexer::records;
3
-
use crate::utils::strongref_to_parts;
3
+
use crate::utils::{extract_mentions_and_tags, merge_tags, strongref_to_parts};
4
4
use chrono::prelude::*;
5
5
use deadpool_postgres::Transaction;
6
6
use futures::pin_mut;
···
119
119
.await
120
120
}
121
121
122
-
const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, created_at) FROM STDIN (FORMAT binary)";
122
+
const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, mentions, created_at) FROM STDIN (FORMAT binary)";
123
123
const POST_TYPES: &[Type] = &[
124
124
Type::TEXT,
125
125
Type::TEXT,
···
135
135
Type::TEXT,
136
136
Type::TEXT,
137
137
Type::TEXT,
138
+
Type::TEXT_ARRAY,
138
139
Type::TIMESTAMP,
139
140
];
140
141
pub async fn copy_posts(
···
159
160
160
161
for (at_uri, cid, post) in data {
161
162
let record = serde_json::to_value(&post).unwrap();
163
+
let (mentions, tags) = post
164
+
.facets
165
+
.as_ref()
166
+
.map(|v| extract_mentions_and_tags(v))
167
+
.unzip();
162
168
let facets = post.facets.and_then(|v| serde_json::to_value(v).ok());
163
169
let embed = post.embed.as_ref().map(|v| v.as_str());
164
170
let embed_subtype = post.embed.as_ref().and_then(|v| v.subtype());
165
171
let (parent_uri, parent_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.parent));
166
172
let (root_uri, root_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.root));
173
+
174
+
let tags = merge_tags(tags, post.tags);
167
175
168
176
let writer = writer.as_mut();
169
177
writer
···
175
183
&post.text,
176
184
&facets,
177
185
&post.langs.unwrap_or_default(),
178
-
&post.tags.unwrap_or_default(),
186
+
&tags,
179
187
&parent_uri,
180
188
&parent_cid,
181
189
&root_uri,
182
190
&root_cid,
183
191
&embed,
184
192
&embed_subtype,
193
+
&mentions,
185
194
&post.created_at.naive_utc(),
186
195
])
187
196
.await?;
188
197
}
189
198
190
199
writer.finish().await?;
200
+
201
+
let threadgated: Vec<(String, String, DateTime<Utc>)> = conn
202
+
.query(
203
+
"SELECT root_uri, p.at_uri, p.created_at FROM posts_tmp p INNER JOIN threadgates t ON root_uri = post_uri WHERE t.allow IS NOT NULL",
204
+
&[],
205
+
)
206
+
.await?
207
+
.into_iter()
208
+
.map(|v| (v.get(0), v.get(1), v.get(2))).collect();
209
+
210
+
for (root, post, created_at) in threadgated {
211
+
match super::post_enforce_threadgate(conn, &root, did, created_at, true).await {
212
+
Ok(true) => {
213
+
conn.execute(
214
+
"UPDATE posts_tmp SET violates_threadgate=TRUE WHERE at_uri=$1",
215
+
&[&post],
216
+
)
217
+
.await?;
218
+
}
219
+
Ok(false) => continue,
220
+
Err(e) => {
221
+
tracing::error!("failed to check threadgate enforcement: {e}");
222
+
continue;
223
+
}
224
+
}
225
+
}
191
226
192
227
conn.execute("INSERT INTO posts (SELECT * FROM posts_tmp)", &[])
193
228
.await
+208
consumer/src/db/gates.rs
+208
consumer/src/db/gates.rs
···
1
+
use super::{PgExecResult, PgResult};
2
+
use crate::indexer::records::{
3
+
AppBskyFeedThreadgate, ThreadgateRule, THREADGATE_RULE_FOLLOWER, THREADGATE_RULE_FOLLOWING,
4
+
THREADGATE_RULE_LIST, THREADGATE_RULE_MENTION,
5
+
};
6
+
use chrono::prelude::*;
7
+
use chrono::{DateTime, Utc};
8
+
use deadpool_postgres::GenericClient;
9
+
use std::collections::HashSet;
10
+
11
+
pub async fn post_enforce_threadgate<C: GenericClient>(
12
+
conn: &mut C,
13
+
root: &str,
14
+
post_author: &str,
15
+
post_created_at: DateTime<Utc>,
16
+
is_backfill: bool,
17
+
) -> PgResult<bool> {
18
+
// check if the root and the current post are the same author
19
+
// strip "at://" then break into parts by '/'
20
+
let parts = root[5..].split('/').collect::<Vec<_>>();
21
+
let root_author = parts[0];
22
+
if root_author == post_author {
23
+
return Ok(false);
24
+
}
25
+
26
+
let tg_data = super::threadgate_get(conn, root).await?;
27
+
28
+
let Some((created_at, allow, allow_lists)) = tg_data else {
29
+
return Ok(false);
30
+
};
31
+
32
+
// when backfilling, there's no point continuing if the record is dated before the threadgate
33
+
if is_backfill && post_created_at < created_at {
34
+
return Ok(false);
35
+
}
36
+
37
+
if allow.is_empty() {
38
+
return Ok(true);
39
+
}
40
+
41
+
let allow: HashSet<String> = HashSet::from_iter(allow);
42
+
43
+
if allow.contains(THREADGATE_RULE_FOLLOWER) || allow.contains(THREADGATE_RULE_FOLLOWING) {
44
+
let profile_state: Option<(bool, bool)> = conn
45
+
.query_opt(
46
+
"SELECT following IS NOT NULL, followed IS NOT NULL FROM profile_states WHERE did=$1 AND subject=$2",
47
+
&[&root_author, &post_author],
48
+
)
49
+
.await?
50
+
.map(|v| (v.get(0), v.get(1)));
51
+
52
+
if let Some((following, followed)) = profile_state {
53
+
if allow.contains(THREADGATE_RULE_FOLLOWER) && followed {
54
+
return Ok(false);
55
+
}
56
+
57
+
if allow.contains(THREADGATE_RULE_FOLLOWING) && following {
58
+
return Ok(false);
59
+
}
60
+
}
61
+
}
62
+
63
+
// check mentions
64
+
if allow.contains(THREADGATE_RULE_MENTION) {
65
+
let mentions: Vec<String> = conn
66
+
.query_opt("SELECT mentions FROM posts WHERE at_uri=$1", &[&root])
67
+
.await?
68
+
.map(|r| r.get(0))
69
+
.unwrap_or_default();
70
+
71
+
if mentions.contains(&post_author.to_owned()) {
72
+
return Ok(false);
73
+
}
74
+
}
75
+
76
+
if allow.contains(THREADGATE_RULE_LIST) {
77
+
if allow_lists.is_empty() {
78
+
return Ok(true);
79
+
}
80
+
81
+
let count: i64 = conn
82
+
.query_one(
83
+
"SELECT count(*) FROM list_items WHERE list_uri=ANY($1) AND subject=$2",
84
+
&[&allow_lists, &post_author],
85
+
)
86
+
.await?
87
+
.get(0);
88
+
if count != 0 {
89
+
return Ok(false);
90
+
}
91
+
}
92
+
93
+
Ok(true)
94
+
}
95
+
96
+
pub async fn postgate_maintain_detaches<C: GenericClient>(
97
+
conn: &mut C,
98
+
post: &str,
99
+
detached: &[String],
100
+
disable_effective: Option<NaiveDateTime>,
101
+
) -> PgExecResult {
102
+
conn.execute(
103
+
"SELECT maintain_postgates($1, $2, $3)",
104
+
&[&post, &detached, &disable_effective],
105
+
)
106
+
.await
107
+
}
108
+
109
+
// variant of post_enforce_threadgate that runs when backfilling to clean up any posts already in DB
110
+
pub async fn threadgate_enforce_backfill<C: GenericClient>(
111
+
conn: &mut C,
112
+
root_author: &str,
113
+
threadgate: &AppBskyFeedThreadgate,
114
+
) -> PgExecResult {
115
+
// pull out allow - if it's None we can skip this gate.
116
+
let Some(allow) = threadgate.allow.as_ref() else {
117
+
return Ok(0);
118
+
};
119
+
120
+
let root = &threadgate.post;
121
+
122
+
if allow.is_empty() {
123
+
// blind update everything
124
+
return conn.execute(
125
+
"UPDATE posts SET violates_threadgate=TRUE WHERE root_uri=$1 AND did != $2 AND created_at >= $3",
126
+
&[&root, &root_author, &threadgate.created_at],
127
+
).await;
128
+
}
129
+
130
+
// pull authors with our root_uri where the author is not the root author and are dated after created_at
131
+
// this is mutable because we'll remove ALLOWED dids
132
+
let mut dids: HashSet<String> = conn
133
+
.query(
134
+
"SELECT DISTINCT did FROM posts WHERE root_uri=$1 AND did != $2 AND created_at >= $3",
135
+
&[&root, &root_author, &threadgate.created_at],
136
+
)
137
+
.await?
138
+
.into_iter()
139
+
.map(|row| row.get(0))
140
+
.collect();
141
+
142
+
// this will be empty if there are no replies.
143
+
if dids.is_empty() {
144
+
return Ok(0);
145
+
}
146
+
147
+
let allowed_lists = allow
148
+
.iter()
149
+
.filter_map(|rule| match rule {
150
+
ThreadgateRule::List { list } => Some(list),
151
+
_ => None,
152
+
})
153
+
.collect::<Vec<_>>();
154
+
155
+
let allow: HashSet<_> = HashSet::from_iter(allow.into_iter().map(|v| v.as_str()));
156
+
157
+
if allow.contains(THREADGATE_RULE_FOLLOWER) && !dids.is_empty() {
158
+
let current_dids: Vec<_> = dids.iter().collect();
159
+
160
+
let res = conn.query(
161
+
"SELECT subject FROM profile_states WHERE did=$1 AND subject=ANY($2) AND followed IS NOT NULL",
162
+
&[&root_author, ¤t_dids]
163
+
).await?;
164
+
165
+
dids = &dids - &HashSet::from_iter(res.into_iter().map(|r| r.get(0)));
166
+
}
167
+
168
+
if allow.contains(THREADGATE_RULE_FOLLOWING) && !dids.is_empty() {
169
+
let current_dids: Vec<_> = dids.iter().collect();
170
+
171
+
let res = conn.query(
172
+
"SELECT subject FROM profile_states WHERE did=$1 AND subject=ANY($2) AND following IS NOT NULL",
173
+
&[&root_author, ¤t_dids]
174
+
).await?;
175
+
176
+
dids = &dids - &HashSet::from_iter(res.into_iter().map(|r| r.get(0)));
177
+
}
178
+
179
+
if allow.contains(THREADGATE_RULE_MENTION) && !dids.is_empty() {
180
+
let mentions: Vec<String> = conn
181
+
.query_opt("SELECT mentions FROM posts WHERE at_uri=$1", &[&root])
182
+
.await?
183
+
.map(|r| r.get(0))
184
+
.unwrap_or_default();
185
+
186
+
dids = &dids - &HashSet::from_iter(mentions);
187
+
}
188
+
189
+
if allow.contains(THREADGATE_RULE_LIST) && !dids.is_empty() {
190
+
let current_dids: Vec<_> = dids.iter().collect();
191
+
192
+
let res = conn
193
+
.query(
194
+
"SELECT subject FROM list_items WHERE list_uri = ANY($1) AND subject = ANY($2)",
195
+
&[&allowed_lists, ¤t_dids],
196
+
)
197
+
.await?;
198
+
199
+
dids = &dids - &HashSet::from_iter(res.into_iter().map(|r| r.get(0)));
200
+
}
201
+
202
+
let dids = dids.into_iter().collect::<Vec<_>>();
203
+
204
+
conn.execute(
205
+
"UPDATE posts SET violates_threadgate=TRUE WHERE root_uri = $1 AND did = ANY($2) AND created_at >= $3",
206
+
&[&threadgate.post, &dids, &threadgate.created_at]
207
+
).await
208
+
}
+2
consumer/src/db/mod.rs
+2
consumer/src/db/mod.rs
+70
-28
consumer/src/db/record.rs
+70
-28
consumer/src/db/record.rs
···
1
1
use super::{PgExecResult, PgOptResult, PgResult};
2
2
use crate::indexer::records::*;
3
-
use crate::utils::{blob_ref, strongref_to_parts};
3
+
use crate::utils::{blob_ref, extract_mentions_and_tags, merge_tags, strongref_to_parts};
4
4
use chrono::prelude::*;
5
5
use deadpool_postgres::GenericClient;
6
6
use ipld_core::cid::Cid;
7
7
use lexica::community_lexicon::bookmarks::Bookmark;
8
+
use std::collections::HashSet;
8
9
9
10
pub async fn record_upsert<C: GenericClient>(
10
11
conn: &mut C,
···
317
318
repo: &str,
318
319
cid: Cid,
319
320
rec: AppBskyFeedPost,
321
+
is_backfill: bool,
320
322
) -> PgExecResult {
321
323
let cid = cid.to_string();
322
324
let record = serde_json::to_value(&rec).unwrap();
325
+
let (mentions, tags) = rec
326
+
.facets
327
+
.as_ref()
328
+
.map(|v| extract_mentions_and_tags(v))
329
+
.unzip();
323
330
let facets = rec.facets.and_then(|v| serde_json::to_value(v).ok());
324
331
let (parent_uri, parent_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.parent));
325
332
let (root_uri, root_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.root));
326
333
let embed = rec.embed.as_ref().map(|v| v.as_str());
327
334
let embed_subtype = rec.embed.as_ref().and_then(|v| v.subtype());
328
335
336
+
// if there is a root, we need to check for the presence of a threadgate.
337
+
let violates_threadgate = match &root_uri {
338
+
Some(root) => {
339
+
super::post_enforce_threadgate(conn, root, repo, rec.created_at, is_backfill).await?
340
+
}
341
+
None => false,
342
+
};
343
+
344
+
let tags = merge_tags(tags, rec.tags);
345
+
329
346
let count = conn
330
347
.execute(
331
348
include_str!("sql/post_insert.sql"),
···
337
354
&rec.text,
338
355
&facets,
339
356
&rec.langs.unwrap_or_default(),
340
-
&rec.tags.unwrap_or_default(),
357
+
&tags,
341
358
&parent_uri,
342
359
&parent_cid,
343
360
&root_uri,
344
361
&root_cid,
345
362
&embed,
346
363
&embed_subtype,
364
+
&mentions,
365
+
&violates_threadgate,
347
366
&rec.created_at,
348
367
],
349
368
)
350
369
.await?;
351
370
352
371
if let Some(embed) = rec.embed.and_then(|embed| embed.into_bsky()) {
353
-
post_embed_insert(conn, at_uri, embed, rec.created_at).await?;
372
+
post_embed_insert(conn, at_uri, embed, rec.created_at, is_backfill).await?;
354
373
}
355
374
356
375
Ok(count)
···
380
399
post: &str,
381
400
embed: AppBskyEmbed,
382
401
created_at: DateTime<Utc>,
402
+
is_backfill: bool,
383
403
) -> PgExecResult {
384
404
match embed {
385
405
AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await,
386
406
AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await,
387
407
AppBskyEmbed::External(embed) => post_embed_external_insert(conn, post, embed).await,
388
408
AppBskyEmbed::Record(embed) => {
389
-
post_embed_record_insert(conn, post, embed, created_at).await
409
+
post_embed_record_insert(conn, post, embed, created_at, is_backfill).await
390
410
}
391
411
AppBskyEmbed::RecordWithMedia(embed) => {
392
-
post_embed_record_insert(conn, post, embed.record, created_at).await?;
412
+
post_embed_record_insert(conn, post, embed.record, created_at, is_backfill).await?;
393
413
match *embed.media {
394
414
AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await,
395
415
AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await,
···
476
496
).await
477
497
}
478
498
499
+
const PG_DISABLE_RULE: &str = "app.bsky.feed.postgate#disableRule";
479
500
async fn post_embed_record_insert<C: GenericClient>(
480
501
conn: &mut C,
481
502
post: &str,
482
503
embed: AppBskyEmbedRecord,
483
504
post_created_at: DateTime<Utc>,
505
+
is_backfill: bool,
484
506
) -> PgExecResult {
485
507
// strip "at://" then break into parts by '/'
486
508
let parts = embed.record.uri[5..].split('/').collect::<Vec<_>>();
487
509
488
510
let detached = if parts[1] == "app.bsky.feed.post" {
489
-
let postgate_effective: Option<DateTime<Utc>> = conn
490
-
.query_opt(
491
-
"SELECT created_at FROM postgates WHERE post_uri=$1",
492
-
&[&post],
493
-
)
494
-
.await?
495
-
.map(|v| v.get(0));
511
+
let pg_data = postgate_get(conn, post).await?;
496
512
497
-
postgate_effective
498
-
.map(|v| Utc::now().min(post_created_at) > v)
499
-
.unwrap_or_default()
513
+
if let Some((effective, detached, rules)) = pg_data {
514
+
let detached: HashSet<String> = HashSet::from_iter(detached);
515
+
let rules: HashSet<String> = HashSet::from_iter(rules);
516
+
let compare_date = match is_backfill {
517
+
true => post_created_at,
518
+
false => Utc::now(),
519
+
};
520
+
521
+
detached.contains(post) || (rules.contains(PG_DISABLE_RULE) && compare_date > effective)
522
+
} else {
523
+
false
524
+
}
500
525
} else {
501
526
false
502
527
};
···
505
530
"INSERT INTO post_embed_record (post_uri, record_type, uri, cid, detached) VALUES ($1, $2, $3, $4, $5)",
506
531
&[&post, &parts[1], &embed.record.uri, &embed.record.cid.to_string(), &detached],
507
532
).await
533
+
}
534
+
535
+
async fn postgate_get<C: GenericClient>(
536
+
conn: &mut C,
537
+
post: &str,
538
+
) -> PgOptResult<(DateTime<Utc>, Vec<String>, Vec<String>)> {
539
+
let res = conn
540
+
.query_opt(
541
+
"SELECT created_at, detached, rules FROM postgates WHERE post_uri=$1",
542
+
&[&post],
543
+
)
544
+
.await?
545
+
.map(|v| (v.get(0), v.get(1), v.get(2)));
546
+
547
+
Ok(res)
508
548
}
509
549
510
550
pub async fn postgate_upsert<C: GenericClient>(
···
536
576
pub async fn postgate_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult {
537
577
conn.execute("DELETE FROM postgates WHERE at_uri=$1", &[&at_uri])
538
578
.await
539
-
}
540
-
541
-
pub async fn postgate_maintain_detaches<C: GenericClient>(
542
-
conn: &mut C,
543
-
post: &str,
544
-
detached: &[String],
545
-
disable_effective: Option<NaiveDateTime>,
546
-
) -> PgExecResult {
547
-
conn.execute(
548
-
"SELECT maintain_postgates($1, $2, $3)",
549
-
&[&post, &detached, &disable_effective],
550
-
)
551
-
.await
552
579
}
553
580
554
581
pub async fn profile_upsert<C: GenericClient>(
···
698
725
pub async fn status_delete<C: GenericClient>(conn: &mut C, did: &str) -> PgExecResult {
699
726
conn.execute("DELETE FROM statuses WHERE did=$1", &[&did])
700
727
.await
728
+
}
729
+
730
+
pub async fn threadgate_get<C: GenericClient>(
731
+
conn: &mut C,
732
+
post: &str,
733
+
) -> PgOptResult<(DateTime<Utc>, Vec<String>, Vec<String>)> {
734
+
let res = conn
735
+
.query_opt(
736
+
"SELECT created_at, allow, allowed_lists FROM threadgates WHERE post_uri=$1 AND allow IS NOT NULL",
737
+
&[&post],
738
+
)
739
+
.await?
740
+
.map(|v| (v.get(0), v.get(1), v.get(2)));
741
+
742
+
Ok(res)
701
743
}
702
744
703
745
pub async fn threadgate_upsert<C: GenericClient>(
+2
-2
consumer/src/db/sql/post_insert.sql
+2
-2
consumer/src/db/sql/post_insert.sql
···
1
1
INSERT INTO posts (at_uri, did, cid, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri,
2
-
root_cid, embed, embed_subtype, created_at)
3
-
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15)
2
+
root_cid, embed, embed_subtype, mentions, violates_threadgate, created_at)
3
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17)
4
4
ON CONFLICT DO NOTHING
+1
-1
consumer/src/indexer/mod.rs
+1
-1
consumer/src/indexer/mod.rs
···
625
625
});
626
626
627
627
let labels = record.labels.clone();
628
-
db::post_insert(conn, at_uri, repo, cid, record).await?;
628
+
db::post_insert(conn, at_uri, repo, cid, record, false).await?;
629
629
if let Some(labels) = labels {
630
630
db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?;
631
631
}
+9
-4
consumer/src/indexer/records.rs
+9
-4
consumer/src/indexer/records.rs
···
272
272
pub hidden_replies: Vec<String>,
273
273
}
274
274
275
+
pub const THREADGATE_RULE_MENTION: &str = "app.bsky.feed.threadgate#mentionRule";
276
+
pub const THREADGATE_RULE_FOLLOWER: &str = "app.bsky.feed.threadgate#followerRule";
277
+
pub const THREADGATE_RULE_FOLLOWING: &str = "app.bsky.feed.threadgate#followingRule";
278
+
pub const THREADGATE_RULE_LIST: &str = "app.bsky.feed.threadgate#listRule";
279
+
275
280
#[derive(Debug, Deserialize, Serialize)]
276
281
#[serde(tag = "$type")]
277
282
pub enum ThreadgateRule {
···
288
293
impl ThreadgateRule {
289
294
pub fn as_str(&self) -> &'static str {
290
295
match self {
291
-
ThreadgateRule::Mention => "app.bsky.feed.threadgate#mentionRule",
292
-
ThreadgateRule::Follower => "app.bsky.feed.threadgate#followerRule",
293
-
ThreadgateRule::Following => "app.bsky.feed.threadgate#followingRule",
294
-
ThreadgateRule::List { .. } => "app.bsky.feed.threadgate#listRule",
296
+
ThreadgateRule::Mention => THREADGATE_RULE_MENTION,
297
+
ThreadgateRule::Follower => THREADGATE_RULE_FOLLOWER,
298
+
ThreadgateRule::Following => THREADGATE_RULE_FOLLOWING,
299
+
ThreadgateRule::List { .. } => THREADGATE_RULE_LIST,
295
300
}
296
301
}
297
302
}
+31
consumer/src/utils.rs
+31
consumer/src/utils.rs
···
1
+
use lexica::app_bsky::richtext::{Facet, FacetMain, FacetOuter};
1
2
use lexica::{Blob, StrongRef};
2
3
use serde::{Deserialize, Deserializer};
3
4
···
39
40
40
41
did == split_aturi[2]
41
42
}
43
+
44
+
pub fn extract_mentions_and_tags(from: &[FacetMain]) -> (Vec<String>, Vec<String>) {
45
+
let (mentions, tags) = from
46
+
.iter()
47
+
.flat_map(|v| {
48
+
v.features.iter().map(|facet| match facet {
49
+
FacetOuter::Bsky(Facet::Mention { did }) => (Some(did), None),
50
+
FacetOuter::Bsky(Facet::Tag { tag }) => (None, Some(tag)),
51
+
_ => (None, None),
52
+
})
53
+
})
54
+
.unzip::<_, _, Vec<_>, Vec<_>>();
55
+
56
+
let mentions = mentions.into_iter().flatten().cloned().collect();
57
+
let tags = tags.into_iter().flatten().cloned().collect();
58
+
59
+
(mentions, tags)
60
+
}
61
+
62
+
pub fn merge_tags<T>(t1: Option<Vec<T>>, t2: Option<Vec<T>>) -> Vec<T> {
63
+
match (t1, t2) {
64
+
(Some(t1), None) => t1,
65
+
(None, Some(t2)) => t2,
66
+
(Some(mut t1), Some(t2)) => {
67
+
t1.extend(t2);
68
+
t1
69
+
}
70
+
_ => Vec::default(),
71
+
}
72
+
}
+15
migrations/2025-09-27-171241_post-tweaks/down.sql
+15
migrations/2025-09-27-171241_post-tweaks/down.sql
···
1
+
alter table posts
2
+
drop column mentions,
3
+
drop column violates_threadgate;
4
+
5
+
drop trigger t_author_feed_ins_post on posts;
6
+
drop trigger t_author_feed_del_post on posts;
7
+
drop trigger t_author_feed_ins_repost on reposts;
8
+
drop trigger t_author_feed_del_repost on reposts;
9
+
10
+
drop function f_author_feed_ins_post;
11
+
drop function f_author_feed_del_post;
12
+
drop function f_author_feed_ins_repost;
13
+
drop function f_author_feed_del_repost;
14
+
15
+
drop table author_feeds;
+79
migrations/2025-09-27-171241_post-tweaks/up.sql
+79
migrations/2025-09-27-171241_post-tweaks/up.sql
···
1
+
alter table posts
2
+
add column mentions text[],
3
+
add column violates_threadgate bool not null default false;
4
+
5
+
create table author_feeds
6
+
(
7
+
uri text primary key,
8
+
cid text not null,
9
+
post text not null,
10
+
did text not null,
11
+
typ text not null,
12
+
sort_at timestamptz not null
13
+
);
14
+
15
+
-- author_feeds post triggers
16
+
create function f_author_feed_ins_post() returns trigger
17
+
language plpgsql as
18
+
$$
19
+
begin
20
+
insert into author_feeds (uri, cid, post, did, typ, sort_at)
21
+
VALUES (NEW.at_uri, NEW.cid, NEW.at_uri, NEW.did, 'post', NEW.created_at)
22
+
on conflict do nothing;
23
+
return NEW;
24
+
end;
25
+
$$;
26
+
27
+
create trigger t_author_feed_ins_post
28
+
before insert
29
+
on posts
30
+
for each row
31
+
execute procedure f_author_feed_ins_post();
32
+
33
+
create function f_author_feed_del_post() returns trigger
34
+
language plpgsql as
35
+
$$
36
+
begin
37
+
delete from author_feeds where did = OLD.did and item = OLD.at_uri and typ = 'post';
38
+
return OLD;
39
+
end;
40
+
$$;
41
+
42
+
create trigger t_author_feed_del_post
43
+
before delete
44
+
on posts
45
+
for each row
46
+
execute procedure f_author_feed_del_post();
47
+
48
+
-- author_feeds repost triggers
49
+
create function f_author_feed_ins_repost() returns trigger
50
+
language plpgsql as
51
+
$$
52
+
begin
53
+
insert into author_feeds (uri, cid, post, did, typ, sort_at)
54
+
VALUES ('at://' || NEW.did || 'app.bsky.feed.repost' || NEW.rkey, NEW.post_cid, NEW.post, NEW.did, 'repost', NEW.created_at)
55
+
on conflict do nothing;
56
+
return NEW;
57
+
end;
58
+
$$;
59
+
60
+
create trigger t_author_feed_ins_repost
61
+
before insert
62
+
on reposts
63
+
for each row
64
+
execute procedure f_author_feed_ins_repost();
65
+
66
+
create function f_author_feed_del_repost() returns trigger
67
+
language plpgsql as
68
+
$$
69
+
begin
70
+
delete from author_feeds where did = OLD.did and item = OLD.post and typ = 'repost';
71
+
return OLD;
72
+
end;
73
+
$$;
74
+
75
+
create trigger t_author_feed_del_repost
76
+
before delete
77
+
on reposts
78
+
for each row
79
+
execute procedure f_author_feed_del_repost();
+16
parakeet-db/src/models.rs
+16
parakeet-db/src/models.rs
···
148
148
pub embed: Option<String>,
149
149
pub embed_subtype: Option<String>,
150
150
151
+
pub mentions: Option<Vec<Option<String>>>,
152
+
pub violates_threadgate: bool,
153
+
151
154
pub created_at: DateTime<Utc>,
152
155
pub indexed_at: NaiveDateTime,
153
156
}
···
414
417
pub subject_type: &'a str,
415
418
pub tags: Vec<String>,
416
419
}
420
+
421
+
#[derive(Debug, Queryable, Selectable, Identifiable)]
422
+
#[diesel(table_name = crate::schema::author_feeds)]
423
+
#[diesel(primary_key(uri))]
424
+
#[diesel(check_for_backend(diesel::pg::Pg))]
425
+
pub struct AuthorFeedItem {
426
+
pub uri: String,
427
+
pub cid: String,
428
+
pub post: String,
429
+
pub did: String,
430
+
pub typ: String,
431
+
pub sort_at: DateTime<Utc>,
432
+
}
+14
parakeet-db/src/schema.rs
+14
parakeet-db/src/schema.rs
···
13
13
}
14
14
15
15
diesel::table! {
16
+
author_feeds (uri) {
17
+
uri -> Text,
18
+
cid -> Text,
19
+
post -> Text,
20
+
did -> Text,
21
+
typ -> Text,
22
+
sort_at -> Timestamptz,
23
+
}
24
+
}
25
+
26
+
diesel::table! {
16
27
backfill (repo, repo_ver) {
17
28
repo -> Text,
18
29
repo_ver -> Text,
···
284
295
embed_subtype -> Nullable<Text>,
285
296
created_at -> Timestamptz,
286
297
indexed_at -> Timestamp,
298
+
mentions -> Nullable<Array<Nullable<Text>>>,
299
+
violates_threadgate -> Bool,
287
300
}
288
301
}
289
302
···
429
442
430
443
diesel::allow_tables_to_appear_in_same_query!(
431
444
actors,
445
+
author_feeds,
432
446
backfill,
433
447
backfill_jobs,
434
448
blocks,
+159
-80
parakeet/src/hydration/posts.rs
+159
-80
parakeet/src/hydration/posts.rs
···
3
3
use lexica::app_bsky::actor::ProfileViewBasic;
4
4
use lexica::app_bsky::embed::Embed;
5
5
use lexica::app_bsky::feed::{
6
-
BlockedAuthor, FeedViewPost, PostView, PostViewerState, ReplyRef, ReplyRefPost, ThreadgateView,
6
+
BlockedAuthor, FeedReasonRepost, FeedViewPost, FeedViewPostReason, PostView, PostViewerState,
7
+
ReplyRef, ReplyRefPost, ThreadgateView,
7
8
};
8
9
use lexica::app_bsky::graph::ListViewBasic;
9
10
use lexica::app_bsky::RecordStats;
···
32
33
}
33
34
}
34
35
36
+
type HydratePostsRet = (
37
+
models::Post,
38
+
ProfileViewBasic,
39
+
Vec<models::Label>,
40
+
Option<Embed>,
41
+
Option<ThreadgateView>,
42
+
Option<PostViewerState>,
43
+
Option<PostStats>,
44
+
);
45
+
35
46
fn build_postview(
36
-
post: models::Post,
37
-
author: ProfileViewBasic,
38
-
labels: Vec<models::Label>,
39
-
embed: Option<Embed>,
40
-
threadgate: Option<ThreadgateView>,
41
-
viewer: Option<PostViewerState>,
42
-
stats: Option<PostStats>,
47
+
(post, author, labels, embed, threadgate, viewer, stats): HydratePostsRet,
43
48
) -> PostView {
44
49
let stats = stats
45
50
.map(|stats| RecordStats {
···
135
140
let threadgate = self.hydrate_threadgate(threadgate).await;
136
141
let labels = self.get_label(&post.at_uri).await;
137
142
138
-
Some(build_postview(
143
+
Some(build_postview((
139
144
post, author, labels, embed, threadgate, viewer, stats,
140
-
))
145
+
)))
141
146
}
142
147
143
-
pub async fn hydrate_posts(&self, posts: Vec<String>) -> HashMap<String, PostView> {
148
+
async fn hydrate_posts_inner(&self, posts: Vec<String>) -> HashMap<String, HydratePostsRet> {
144
149
let stats = self.loaders.post_stats.load_many(posts.clone()).await;
145
150
let posts = self.loaders.posts.load_many(posts).await;
146
151
···
150
155
.unzip::<_, _, Vec<_>, Vec<_>>();
151
156
let authors = self.hydrate_profiles_basic(authors).await;
152
157
153
-
let post_labels = self.get_label_many(&post_uris).await;
154
-
let viewer_data = self.get_post_viewer_states(&post_uris).await;
158
+
let mut post_labels = self.get_label_many(&post_uris).await;
159
+
let mut viewer_data = self.get_post_viewer_states(&post_uris).await;
155
160
156
161
let threadgates = posts
157
162
.values()
···
159
164
.collect();
160
165
let threadgates = self.hydrate_threadgates(threadgates).await;
161
166
162
-
let embeds = self.hydrate_embeds(post_uris).await;
167
+
let mut embeds = self.hydrate_embeds(post_uris).await;
163
168
164
169
posts
165
170
.into_iter()
166
171
.filter_map(|(uri, (post, threadgate))| {
167
-
let author = authors.get(&post.did)?;
168
-
let embed = embeds.get(&uri).cloned();
172
+
let author = authors.get(&post.did)?.clone();
173
+
let embed = embeds.remove(&uri);
169
174
let threadgate = threadgate.and_then(|tg| threadgates.get(&tg.at_uri).cloned());
170
-
let labels = post_labels.get(&uri).cloned().unwrap_or_default();
175
+
let labels = post_labels.remove(&uri).unwrap_or_default();
171
176
let stats = stats.get(&uri).cloned();
172
-
let viewer = viewer_data.get(&uri).cloned();
177
+
let viewer = viewer_data.remove(&uri);
173
178
174
179
Some((
175
180
uri,
176
-
build_postview(
177
-
post,
178
-
author.to_owned(),
179
-
labels,
180
-
embed,
181
-
threadgate,
182
-
viewer,
183
-
stats,
184
-
),
181
+
(post, author, labels, embed, threadgate, viewer, stats),
185
182
))
186
183
})
187
184
.collect()
188
185
}
189
186
190
-
pub async fn hydrate_feed_posts(&self, posts: Vec<String>) -> HashMap<String, FeedViewPost> {
191
-
let stats = self.loaders.post_stats.load_many(posts.clone()).await;
192
-
let posts = self.loaders.posts.load_many(posts).await;
193
-
194
-
let (authors, post_uris) = posts
195
-
.values()
196
-
.map(|(post, _)| (post.did.clone(), post.at_uri.clone()))
197
-
.unzip::<_, _, Vec<_>, Vec<_>>();
198
-
let authors = self.hydrate_profiles_basic(authors).await;
187
+
pub async fn hydrate_posts(&self, posts: Vec<String>) -> HashMap<String, PostView> {
188
+
self.hydrate_posts_inner(posts)
189
+
.await
190
+
.into_iter()
191
+
.map(|(uri, data)| (uri, build_postview(data)))
192
+
.collect()
193
+
}
199
194
200
-
let post_labels = self.get_label_many(&post_uris).await;
201
-
let viewer_data = self.get_post_viewer_states(&post_uris).await;
202
-
let embeds = self.hydrate_embeds(post_uris).await;
195
+
pub async fn hydrate_feed_posts(
196
+
&self,
197
+
posts: Vec<RawFeedItem>,
198
+
author_threads_only: bool,
199
+
) -> Vec<FeedViewPost> {
200
+
let post_uris = posts
201
+
.iter()
202
+
.map(|item| item.post_uri().to_string())
203
+
.collect::<Vec<_>>();
204
+
let mut posts_hyd = self.hydrate_posts_inner(post_uris).await;
203
205
204
-
let reply_refs = posts
206
+
// we shouldn't show the parent when the post violates a threadgate.
207
+
let reply_refs = posts_hyd
205
208
.values()
206
-
.flat_map(|(post, _)| [post.parent_uri.clone(), post.root_uri.clone()])
209
+
.filter(|(post, ..)| !post.violates_threadgate)
210
+
.flat_map(|(post, ..)| [post.parent_uri.clone(), post.root_uri.clone()])
207
211
.flatten()
208
212
.collect::<Vec<_>>();
209
-
210
213
let reply_posts = self.hydrate_posts(reply_refs).await;
211
214
215
+
let repost_profiles = posts
216
+
.iter()
217
+
.filter_map(|item| item.repost_by())
218
+
.collect::<Vec<_>>();
219
+
let profiles_hydrated = self.hydrate_profiles_basic(repost_profiles).await;
220
+
212
221
posts
213
222
.into_iter()
214
-
.filter_map(|(post_uri, (post, _))| {
215
-
let author = authors.get(&post.did)?;
223
+
.filter_map(|item| {
224
+
let post = posts_hyd.remove(item.post_uri())?;
225
+
let context = item.context();
226
+
227
+
let reply = if let RawFeedItem::Post { .. } = item {
228
+
let root_uri = post.0.root_uri.as_ref();
229
+
let parent_uri = post.0.parent_uri.as_ref();
216
230
217
-
let root = post.root_uri.as_ref().and_then(|uri| reply_posts.get(uri));
218
-
let parent = post
219
-
.parent_uri
220
-
.as_ref()
221
-
.and_then(|uri| reply_posts.get(uri));
231
+
let (root, parent) = if author_threads_only {
232
+
if root_uri.is_some() && parent_uri.is_some() {
233
+
let root = root_uri.and_then(|uri| posts_hyd.get(uri))?;
234
+
let parent = parent_uri.and_then(|uri| posts_hyd.get(uri))?;
235
+
236
+
let root = build_postview(root.clone());
237
+
let parent = build_postview(parent.clone());
238
+
239
+
(Some(root), Some(parent))
240
+
} else {
241
+
(None, None)
242
+
}
243
+
} else {
244
+
let root = root_uri.and_then(|uri| reply_posts.get(uri)).cloned();
245
+
let parent = parent_uri.and_then(|uri| reply_posts.get(uri)).cloned();
246
+
247
+
(root, parent)
248
+
};
222
249
223
-
let reply = if post.parent_uri.is_some() && post.root_uri.is_some() {
224
-
Some(ReplyRef {
225
-
root: root.cloned().map(postview_to_replyref).unwrap_or(
226
-
ReplyRefPost::NotFound {
227
-
uri: post.root_uri.as_ref().unwrap().clone(),
228
-
not_found: true,
229
-
},
230
-
),
231
-
parent: parent.cloned().map(postview_to_replyref).unwrap_or(
232
-
ReplyRefPost::NotFound {
233
-
uri: post.parent_uri.as_ref().unwrap().clone(),
234
-
not_found: true,
235
-
},
236
-
),
237
-
grandparent_author: None,
238
-
})
250
+
if root_uri.is_some() || parent_uri.is_some() {
251
+
Some(ReplyRef {
252
+
root: root.map(postview_to_replyref).unwrap_or(
253
+
ReplyRefPost::NotFound {
254
+
uri: root_uri.unwrap().to_owned(),
255
+
not_found: true,
256
+
},
257
+
),
258
+
parent: parent.map(postview_to_replyref).unwrap_or(
259
+
ReplyRefPost::NotFound {
260
+
uri: parent_uri.unwrap().to_owned(),
261
+
not_found: true,
262
+
},
263
+
),
264
+
grandparent_author: None,
265
+
})
266
+
} else {
267
+
None
268
+
}
239
269
} else {
240
270
None
241
271
};
242
272
243
-
let embed = embeds.get(&post_uri).cloned();
244
-
let labels = post_labels.get(&post_uri).cloned().unwrap_or_default();
245
-
let stats = stats.get(&post_uri).cloned();
246
-
let viewer = viewer_data.get(&post_uri).cloned();
247
-
let post =
248
-
build_postview(post, author.to_owned(), labels, embed, None, viewer, stats);
273
+
let reason = match item {
274
+
RawFeedItem::Repost { uri, by, at, .. } => {
275
+
Some(FeedViewPostReason::Repost(FeedReasonRepost {
276
+
by: profiles_hydrated.get(&by).cloned()?,
277
+
uri: Some(uri),
278
+
cid: None,
279
+
indexed_at: at,
280
+
}))
281
+
}
282
+
RawFeedItem::Pin { .. } => Some(FeedViewPostReason::Pin),
283
+
_ => None,
284
+
};
285
+
286
+
let post = build_postview(post);
249
287
250
-
Some((
251
-
post_uri,
252
-
FeedViewPost {
253
-
post,
254
-
reply,
255
-
reason: None,
256
-
feed_context: None,
257
-
},
258
-
))
288
+
Some(FeedViewPost {
289
+
post,
290
+
reply,
291
+
reason,
292
+
feed_context: context,
293
+
})
259
294
})
260
295
.collect()
261
296
}
···
299
334
_ => ReplyRefPost::Post(post),
300
335
}
301
336
}
337
+
338
+
#[derive(Debug)]
339
+
pub enum RawFeedItem {
340
+
Pin {
341
+
uri: String,
342
+
context: Option<String>,
343
+
},
344
+
Post {
345
+
uri: String,
346
+
context: Option<String>,
347
+
},
348
+
Repost {
349
+
uri: String,
350
+
post: String,
351
+
by: String,
352
+
at: chrono::DateTime<chrono::Utc>,
353
+
context: Option<String>,
354
+
},
355
+
}
356
+
357
+
impl RawFeedItem {
358
+
fn post_uri(&self) -> &str {
359
+
match self {
360
+
RawFeedItem::Pin { uri, .. } => uri,
361
+
RawFeedItem::Post { uri, .. } => uri,
362
+
RawFeedItem::Repost { post, .. } => post,
363
+
}
364
+
}
365
+
366
+
fn repost_by(&self) -> Option<String> {
367
+
match self {
368
+
RawFeedItem::Repost { by, .. } => Some(by.clone()),
369
+
_ => None,
370
+
}
371
+
}
372
+
373
+
fn context(&self) -> Option<String> {
374
+
match self {
375
+
RawFeedItem::Pin { context, .. } => context.clone(),
376
+
RawFeedItem::Post { context, .. } => context.clone(),
377
+
RawFeedItem::Repost { context, .. } => context.clone(),
378
+
}
379
+
}
380
+
}
+2
-2
parakeet/src/sql/thread.sql
+2
-2
parakeet/src/sql/thread.sql
···
1
1
with recursive thread as (select at_uri, parent_uri, root_uri, 0 as depth
2
2
from posts
3
-
where parent_uri = $1
3
+
where parent_uri = $1 and violates_threadgate=FALSE
4
4
union all
5
5
select p.at_uri, p.parent_uri, p.root_uri, thread.depth + 1
6
6
from posts p
7
7
join thread on p.parent_uri = thread.at_uri
8
-
where thread.depth <= $2)
8
+
where thread.depth <= $2 and p.violates_threadgate=FALSE)
9
9
select *
10
10
from thread
11
11
order by depth desc;
+4
-2
parakeet/src/sql/thread_parent.sql
+4
-2
parakeet/src/sql/thread_parent.sql
···
1
1
with recursive parents as (select at_uri, cid, parent_uri, root_uri, 0 as depth
2
2
from posts
3
-
where at_uri = (select parent_uri from posts where at_uri = $1)
3
+
where
4
+
at_uri = (select parent_uri from posts where at_uri = $1 and violates_threadgate = FALSE)
4
5
union all
5
6
select p.at_uri, p.cid, p.parent_uri, p.root_uri, parents.depth + 1
6
7
from posts p
7
8
join parents on p.at_uri = parents.parent_uri
8
-
where parents.depth <= $2)
9
+
where parents.depth <= $2
10
+
and p.violates_threadgate = FALSE)
9
11
select *
10
12
from parents
11
13
order by depth desc;
+7
-8
parakeet/src/xrpc/app_bsky/feed/likes.rs
+7
-8
parakeet/src/xrpc/app_bsky/feed/likes.rs
···
1
+
use crate::hydration::posts::RawFeedItem;
1
2
use crate::hydration::StatefulHydrator;
2
3
use crate::xrpc::error::{Error, XrpcResult};
3
4
use crate::xrpc::extract::{AtpAcceptLabelers, AtpAuth};
···
57
58
.last()
58
59
.map(|(last, _)| last.timestamp_millis().to_string());
59
60
60
-
let at_uris = results
61
+
let raw_feed = results
61
62
.iter()
62
-
.map(|(_, uri)| uri.clone())
63
+
.map(|(_, uri)| RawFeedItem::Post {
64
+
uri: uri.clone(),
65
+
context: None,
66
+
})
63
67
.collect::<Vec<_>>();
64
68
65
-
let mut posts = hyd.hydrate_feed_posts(at_uris).await;
66
-
67
-
let feed: Vec<_> = results
68
-
.into_iter()
69
-
.filter_map(|(_, uri)| posts.remove(&uri))
70
-
.collect();
69
+
let feed = hyd.hydrate_feed_posts(raw_feed, false).await;
71
70
72
71
Ok(Json(FeedRes { cursor, feed }))
73
72
}
+83
-92
parakeet/src/xrpc/app_bsky/feed/posts.rs
+83
-92
parakeet/src/xrpc/app_bsky/feed/posts.rs
···
1
+
use crate::hydration::posts::RawFeedItem;
1
2
use crate::hydration::StatefulHydrator;
2
3
use crate::xrpc::app_bsky::graph::lists::ListWithCursorQuery;
3
4
use crate::xrpc::error::{Error, XrpcResult};
···
16
17
use diesel_async::{AsyncPgConnection, RunQueryDsl};
17
18
use lexica::app_bsky::actor::ProfileView;
18
19
use lexica::app_bsky::feed::{
19
-
BlockedAuthor, FeedReasonRepost, FeedSkeletonResponse, FeedViewPost, FeedViewPostReason,
20
-
PostView, SkeletonReason, ThreadViewPost, ThreadViewPostType, ThreadgateView,
20
+
BlockedAuthor, FeedSkeletonResponse, FeedViewPost, PostView, SkeletonReason, ThreadViewPost,
21
+
ThreadViewPostType, ThreadgateView,
21
22
};
22
-
use parakeet_db::schema;
23
+
use parakeet_db::{models, schema};
23
24
use reqwest::Url;
24
25
use serde::{Deserialize, Serialize};
25
26
use std::collections::HashMap;
···
113
114
114
115
let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, maybe_auth);
115
116
116
-
let at_uris = skeleton.feed.iter().map(|v| v.post.clone()).collect();
117
117
let repost_skeleton = skeleton
118
118
.feed
119
119
.iter()
···
122
122
_ => None,
123
123
})
124
124
.collect::<Vec<_>>();
125
+
let mut repost_data = get_skeleton_repost_data(&mut conn, repost_skeleton).await;
125
126
126
-
let mut posts = hyd.hydrate_feed_posts(at_uris).await;
127
-
let mut repost_data = get_skeleton_repost_data(&mut conn, &hyd, repost_skeleton).await;
128
-
129
-
let feed = skeleton
127
+
let raw_feed = skeleton
130
128
.feed
131
129
.into_iter()
132
-
.filter_map(|item| {
133
-
let mut post = posts.remove(&item.post)?;
134
-
let reason = match item.reason {
135
-
Some(SkeletonReason::Repost { repost }) => {
136
-
repost_data.remove(&repost).map(FeedViewPostReason::Repost)
137
-
}
138
-
Some(SkeletonReason::Pin {}) => Some(FeedViewPostReason::Pin),
139
-
_ => None,
140
-
};
141
-
142
-
post.reason = reason;
143
-
post.feed_context = item.feed_context;
144
-
145
-
Some(post)
130
+
.filter_map(|v| match v.reason {
131
+
Some(SkeletonReason::Repost { repost }) => {
132
+
repost_data
133
+
.remove_entry(&repost)
134
+
.map(|(uri, (by, at))| RawFeedItem::Repost {
135
+
uri,
136
+
post: v.post,
137
+
by,
138
+
at: at.and_utc(),
139
+
context: v.feed_context,
140
+
})
141
+
}
142
+
Some(SkeletonReason::Pin {}) => Some(RawFeedItem::Pin {
143
+
uri: v.post,
144
+
context: v.feed_context,
145
+
}),
146
+
None => Some(RawFeedItem::Post {
147
+
uri: v.post,
148
+
context: v.feed_context,
149
+
}),
146
150
})
147
151
.collect();
148
152
153
+
let feed = hyd.hydrate_feed_posts(raw_feed, false).await;
154
+
149
155
Ok(Json(FeedRes {
150
156
cursor: skeleton.cursor,
151
157
feed,
152
158
}))
153
159
}
154
160
155
-
#[derive(Debug, Deserialize)]
161
+
#[derive(Debug, Default, Eq, PartialEq, Deserialize)]
156
162
#[serde(rename_all = "snake_case")]
157
163
pub enum GetAuthorFeedFilter {
164
+
#[default]
158
165
PostsWithReplies,
159
166
PostsNoReplies,
160
167
PostsWithMedia,
161
168
PostsAndAuthorThreads,
162
169
PostsWithVideo,
163
-
}
164
-
165
-
impl Default for GetAuthorFeedFilter {
166
-
fn default() -> Self {
167
-
Self::PostsWithReplies
168
-
}
169
170
}
170
171
171
172
#[derive(Debug, Deserialize)]
···
209
210
210
211
let pin = match query.include_pins && query.cursor.is_none() {
211
212
false => None,
212
-
true => match crate::db::get_pinned_post_uri(&mut conn, &did).await? {
213
-
Some(post) => hyd.hydrate_post(post).await,
214
-
None => None,
215
-
},
213
+
true => crate::db::get_pinned_post_uri(&mut conn, &did).await?,
216
214
};
217
215
218
216
let limit = query.limit.unwrap_or(50).clamp(1, 100);
219
217
220
-
let mut posts_query = schema::posts::table
221
-
.select((schema::posts::created_at, schema::posts::at_uri))
222
-
.filter(schema::posts::did.eq(did))
218
+
let mut posts_query = schema::author_feeds::table
219
+
.select(models::AuthorFeedItem::as_select())
220
+
.left_join(schema::posts::table.on(schema::posts::at_uri.eq(schema::author_feeds::post)))
221
+
.filter(schema::author_feeds::did.eq(&did))
223
222
.into_boxed();
224
223
225
224
if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) {
226
-
posts_query = posts_query.filter(schema::posts::created_at.lt(cursor));
225
+
posts_query = posts_query.filter(schema::author_feeds::sort_at.lt(cursor));
227
226
}
228
227
228
+
let author_threads_only = query.filter == GetAuthorFeedFilter::PostsAndAuthorThreads;
229
229
posts_query = match query.filter {
230
-
GetAuthorFeedFilter::PostsWithReplies => posts_query,
230
+
GetAuthorFeedFilter::PostsWithReplies => {
231
+
posts_query.filter(schema::author_feeds::typ.eq("post"))
232
+
}
231
233
GetAuthorFeedFilter::PostsNoReplies => {
232
234
posts_query.filter(schema::posts::parent_uri.is_null())
233
235
}
234
-
GetAuthorFeedFilter::PostsWithMedia => posts_query.filter(embed_type_filter(&[
235
-
"app.bsky.embed.video",
236
-
"app.bsky.embed.images",
237
-
])),
236
+
GetAuthorFeedFilter::PostsWithMedia => posts_query.filter(
237
+
embed_type_filter(&["app.bsky.embed.video", "app.bsky.embed.images"])
238
+
.and(schema::author_feeds::typ.eq("post")),
239
+
),
238
240
GetAuthorFeedFilter::PostsAndAuthorThreads => posts_query.filter(
239
241
(schema::posts::parent_uri
240
-
.like(format!("at://{}/%", &query.actor))
242
+
.like(format!("at://{did}/%"))
241
243
.or(schema::posts::parent_uri.is_null()))
242
244
.and(
243
245
schema::posts::root_uri
244
-
.like(format!("at://{}/%", &query.actor))
246
+
.like(format!("at://{did}/%"))
245
247
.or(schema::posts::root_uri.is_null()),
246
248
),
247
249
),
248
-
GetAuthorFeedFilter::PostsWithVideo => {
249
-
posts_query.filter(embed_type_filter(&["app.bsky.embed.video"]))
250
-
}
250
+
GetAuthorFeedFilter::PostsWithVideo => posts_query.filter(
251
+
embed_type_filter(&["app.bsky.embed.video"]).and(schema::author_feeds::typ.eq("post")),
252
+
),
251
253
};
252
254
253
255
let results = posts_query
254
-
.order(schema::posts::created_at.desc())
256
+
.order(schema::author_feeds::sort_at.desc())
255
257
.limit(limit as i64)
256
-
.load::<(chrono::DateTime<chrono::Utc>, String)>(&mut conn)
258
+
.load(&mut conn)
257
259
.await?;
258
260
259
261
let cursor = results
260
262
.last()
261
-
.map(|(last, _)| last.timestamp_millis().to_string());
263
+
.map(|item| item.sort_at.timestamp_millis().to_string());
262
264
263
-
let at_uris = results
264
-
.iter()
265
-
.map(|(_, uri)| uri.clone())
266
-
.collect::<Vec<_>>();
267
-
268
-
let mut posts = hyd.hydrate_feed_posts(at_uris).await;
269
-
270
-
let mut feed: Vec<_> = results
265
+
let mut raw_feed = results
271
266
.into_iter()
272
-
.filter_map(|(_, uri)| posts.remove(&uri))
273
-
.collect();
267
+
.filter_map(|item| match &*item.typ {
268
+
"post" => Some(RawFeedItem::Post {
269
+
uri: item.post,
270
+
context: None,
271
+
}),
272
+
"repost" => Some(RawFeedItem::Repost {
273
+
uri: item.uri,
274
+
post: item.post,
275
+
by: item.did,
276
+
at: item.sort_at,
277
+
context: None,
278
+
}),
279
+
_ => None,
280
+
})
281
+
.collect::<Vec<_>>();
274
282
275
283
if let Some(post) = pin {
276
-
feed.insert(
284
+
raw_feed.insert(
277
285
0,
278
-
FeedViewPost {
279
-
post,
280
-
reply: None,
281
-
reason: Some(FeedViewPostReason::Pin),
282
-
feed_context: None,
286
+
RawFeedItem::Pin {
287
+
uri: post,
288
+
context: None,
283
289
},
284
290
);
285
291
}
292
+
293
+
let feed = hyd.hydrate_feed_posts(raw_feed, author_threads_only).await;
286
294
287
295
Ok(Json(FeedRes { cursor, feed }))
288
296
}
···
325
333
.last()
326
334
.map(|(last, _)| last.timestamp_millis().to_string());
327
335
328
-
let at_uris = results
336
+
let raw_feed = results
329
337
.iter()
330
-
.map(|(_, uri)| uri.clone())
338
+
.map(|(_, uri)| RawFeedItem::Post {
339
+
uri: uri.clone(),
340
+
context: None,
341
+
})
331
342
.collect::<Vec<_>>();
332
343
333
-
let mut posts = hyd.hydrate_feed_posts(at_uris).await;
334
-
335
-
let feed = results
336
-
.into_iter()
337
-
.filter_map(|(_, uri)| posts.remove(&uri))
338
-
.collect();
344
+
let feed = hyd.hydrate_feed_posts(raw_feed, false).await;
339
345
340
346
Ok(Json(FeedRes { cursor, feed }))
341
347
}
···
669
675
}
670
676
}
671
677
672
-
async fn get_skeleton_repost_data<'a>(
678
+
async fn get_skeleton_repost_data(
673
679
conn: &mut AsyncPgConnection,
674
-
hyd: &StatefulHydrator<'a>,
675
680
reposts: Vec<String>,
676
-
) -> HashMap<String, FeedReasonRepost> {
681
+
) -> HashMap<String, (String, NaiveDateTime)> {
677
682
let Ok(repost_data) = schema::records::table
678
683
.select((
679
684
schema::records::at_uri,
···
687
692
return HashMap::new();
688
693
};
689
694
690
-
let profiles = repost_data.iter().map(|(_, did, _)| did.clone()).collect();
691
-
let profiles = hyd.hydrate_profiles_basic(profiles).await;
692
-
693
695
repost_data
694
696
.into_iter()
695
-
.filter_map(|(uri, did, indexed_at)| {
696
-
let by = profiles.get(&did).cloned()?;
697
-
698
-
let repost = FeedReasonRepost {
699
-
by,
700
-
uri: Some(uri.clone()),
701
-
cid: None, // okay, we do have this, but the app doesn't seem to be bothered about not setting it.
702
-
indexed_at: indexed_at.and_utc(),
703
-
};
704
-
705
-
Some((uri, repost))
706
-
})
697
+
.map(|(uri, did, at)| (uri, (did, at)))
707
698
.collect()
708
699
}
709
700