Parakeet is a Rust-based Bluesky AppView aiming to implement most of the functionality required to support the Bluesky client

Merge branch 'post-tweaks' into 'main'

Post Tweaks

See merge request parakeet-social/parakeet!23

Changed files
+757 -224
consumer
migrations
2025-09-27-171241_post-tweaks
parakeet
src
hydration
sql
xrpc
app_bsky
parakeet-db
+6 -1
consumer/src/backfill/mod.rs
··· 275 275 follows: Vec<(String, String, DateTime<Utc>)>, 276 276 list_items: Vec<(String, records::AppBskyGraphListItem)>, 277 277 verifications: Vec<(String, Cid, records::AppBskyGraphVerification)>, 278 + threadgates: Vec<(String, Cid, records::AppBskyFeedThreadgate)>, // not COPY'd but needs to be kept until last. 278 279 records: Vec<(String, Cid)>, 279 280 } 280 281 281 282 impl CopyStore { 282 283 async fn submit(self, t: &mut Transaction<'_>, did: &str) -> Result<(), tokio_postgres::Error> { 283 284 db::copy::copy_likes(t, did, self.likes).await?; 284 - db::copy::copy_posts(t, did, self.posts).await?; 285 285 db::copy::copy_reposts(t, did, self.reposts).await?; 286 286 db::copy::copy_blocks(t, did, self.blocks).await?; 287 287 db::copy::copy_follows(t, did, self.follows).await?; 288 288 db::copy::copy_list_items(t, self.list_items).await?; 289 289 db::copy::copy_verification(t, did, self.verifications).await?; 290 + db::copy::copy_posts(t, did, self.posts).await?; 291 + for (at_uri, cid, record) in self.threadgates { 292 + db::threadgate_enforce_backfill(t, did, &record).await?; 293 + db::threadgate_upsert(t, &at_uri, cid, record).await?; 294 + } 290 295 db::copy::copy_records(t, did, self.records).await?; 291 296 292 297 Ok(())
+11 -1
consumer/src/backfill/repo.rs
··· 4 4 }; 5 5 use crate::indexer::records; 6 6 use crate::indexer::types::{AggregateDeltaStore, RecordTypes}; 7 + use crate::utils::at_uri_is_by; 7 8 use crate::{db, indexer}; 8 9 use deadpool_postgres::Transaction; 9 10 use ipld_core::cid::Cid; ··· 144 145 db::maintain_self_labels(t, did, Some(cid), &at_uri, labels).await?; 145 146 } 146 147 if let Some(embed) = rec.embed.clone().and_then(|embed| embed.into_bsky()) { 147 - db::post_embed_insert(t, &at_uri, embed, rec.created_at).await?; 148 + db::post_embed_insert(t, &at_uri, embed, rec.created_at, true).await?; 148 149 } 149 150 150 151 deltas.incr(did, AggregateType::ProfilePost).await; ··· 165 166 copies 166 167 .reposts 167 168 .push((rkey.to_string(), rec.subject, rec.via, rec.created_at)); 169 + } 170 + RecordTypes::AppBskyFeedThreadgate(record) => { 171 + if !at_uri_is_by(&record.post, did) { 172 + tracing::warn!("tried to create a threadgate on a post we don't control!"); 173 + return Ok(()); 174 + } 175 + 176 + copies.push_record(&at_uri, cid); 177 + copies.threadgates.push((at_uri, cid, record)); 168 178 } 169 179 RecordTypes::AppBskyGraphBlock(rec) => { 170 180 copies.push_record(&at_uri, cid);
+38 -3
consumer/src/db/copy.rs
··· 1 1 use super::PgExecResult; 2 2 use crate::indexer::records; 3 - use crate::utils::strongref_to_parts; 3 + use crate::utils::{extract_mentions_and_tags, merge_tags, strongref_to_parts}; 4 4 use chrono::prelude::*; 5 5 use deadpool_postgres::Transaction; 6 6 use futures::pin_mut; ··· 119 119 .await 120 120 } 121 121 122 - const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, created_at) FROM STDIN (FORMAT binary)"; 122 + const POST_STMT: &str = "COPY posts_tmp (at_uri, cid, did, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, root_cid, embed, embed_subtype, mentions, created_at) FROM STDIN (FORMAT binary)"; 123 123 const POST_TYPES: &[Type] = &[ 124 124 Type::TEXT, 125 125 Type::TEXT, ··· 135 135 Type::TEXT, 136 136 Type::TEXT, 137 137 Type::TEXT, 138 + Type::TEXT_ARRAY, 138 139 Type::TIMESTAMP, 139 140 ]; 140 141 pub async fn copy_posts( ··· 159 160 160 161 for (at_uri, cid, post) in data { 161 162 let record = serde_json::to_value(&post).unwrap(); 163 + let (mentions, tags) = post 164 + .facets 165 + .as_ref() 166 + .map(|v| extract_mentions_and_tags(v)) 167 + .unzip(); 162 168 let facets = post.facets.and_then(|v| serde_json::to_value(v).ok()); 163 169 let embed = post.embed.as_ref().map(|v| v.as_str()); 164 170 let embed_subtype = post.embed.as_ref().and_then(|v| v.subtype()); 165 171 let (parent_uri, parent_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.parent)); 166 172 let (root_uri, root_cid) = strongref_to_parts(post.reply.as_ref().map(|v| &v.root)); 173 + 174 + let tags = merge_tags(tags, post.tags); 167 175 168 176 let writer = writer.as_mut(); 169 177 writer ··· 175 183 &post.text, 176 184 &facets, 177 185 &post.langs.unwrap_or_default(), 178 - &post.tags.unwrap_or_default(), 186 + &tags, 179 187 &parent_uri, 180 188 &parent_cid, 181 189 &root_uri, 182 190 &root_cid, 183 191 &embed, 184 192 &embed_subtype, 193 + &mentions, 185 194 &post.created_at.naive_utc(), 186 195 ]) 187 196 .await?; 188 197 } 189 198 190 199 writer.finish().await?; 200 + 201 + let threadgated: Vec<(String, String, DateTime<Utc>)> = conn 202 + .query( 203 + "SELECT root_uri, p.at_uri, p.created_at FROM posts_tmp p INNER JOIN threadgates t ON root_uri = post_uri WHERE t.allow IS NOT NULL", 204 + &[], 205 + ) 206 + .await? 207 + .into_iter() 208 + .map(|v| (v.get(0), v.get(1), v.get(2))).collect(); 209 + 210 + for (root, post, created_at) in threadgated { 211 + match super::post_enforce_threadgate(conn, &root, did, created_at, true).await { 212 + Ok(true) => { 213 + conn.execute( 214 + "UPDATE posts_tmp SET violates_threadgate=TRUE WHERE at_uri=$1", 215 + &[&post], 216 + ) 217 + .await?; 218 + } 219 + Ok(false) => continue, 220 + Err(e) => { 221 + tracing::error!("failed to check threadgate enforcement: {e}"); 222 + continue; 223 + } 224 + } 225 + } 191 226 192 227 conn.execute("INSERT INTO posts (SELECT * FROM posts_tmp)", &[]) 193 228 .await
+208
consumer/src/db/gates.rs
··· 1 + use super::{PgExecResult, PgResult}; 2 + use crate::indexer::records::{ 3 + AppBskyFeedThreadgate, ThreadgateRule, THREADGATE_RULE_FOLLOWER, THREADGATE_RULE_FOLLOWING, 4 + THREADGATE_RULE_LIST, THREADGATE_RULE_MENTION, 5 + }; 6 + use chrono::prelude::*; 7 + use chrono::{DateTime, Utc}; 8 + use deadpool_postgres::GenericClient; 9 + use std::collections::HashSet; 10 + 11 + pub async fn post_enforce_threadgate<C: GenericClient>( 12 + conn: &mut C, 13 + root: &str, 14 + post_author: &str, 15 + post_created_at: DateTime<Utc>, 16 + is_backfill: bool, 17 + ) -> PgResult<bool> { 18 + // check if the root and the current post are the same author 19 + // strip "at://" then break into parts by '/' 20 + let parts = root[5..].split('/').collect::<Vec<_>>(); 21 + let root_author = parts[0]; 22 + if root_author == post_author { 23 + return Ok(false); 24 + } 25 + 26 + let tg_data = super::threadgate_get(conn, root).await?; 27 + 28 + let Some((created_at, allow, allow_lists)) = tg_data else { 29 + return Ok(false); 30 + }; 31 + 32 + // when backfilling, there's no point continuing if the record is dated before the threadgate 33 + if is_backfill && post_created_at < created_at { 34 + return Ok(false); 35 + } 36 + 37 + if allow.is_empty() { 38 + return Ok(true); 39 + } 40 + 41 + let allow: HashSet<String> = HashSet::from_iter(allow); 42 + 43 + if allow.contains(THREADGATE_RULE_FOLLOWER) || allow.contains(THREADGATE_RULE_FOLLOWING) { 44 + let profile_state: Option<(bool, bool)> = conn 45 + .query_opt( 46 + "SELECT following IS NOT NULL, followed IS NOT NULL FROM profile_states WHERE did=$1 AND subject=$2", 47 + &[&root_author, &post_author], 48 + ) 49 + .await? 50 + .map(|v| (v.get(0), v.get(1))); 51 + 52 + if let Some((following, followed)) = profile_state { 53 + if allow.contains(THREADGATE_RULE_FOLLOWER) && followed { 54 + return Ok(false); 55 + } 56 + 57 + if allow.contains(THREADGATE_RULE_FOLLOWING) && following { 58 + return Ok(false); 59 + } 60 + } 61 + } 62 + 63 + // check mentions 64 + if allow.contains(THREADGATE_RULE_MENTION) { 65 + let mentions: Vec<String> = conn 66 + .query_opt("SELECT mentions FROM posts WHERE at_uri=$1", &[&root]) 67 + .await? 68 + .map(|r| r.get(0)) 69 + .unwrap_or_default(); 70 + 71 + if mentions.contains(&post_author.to_owned()) { 72 + return Ok(false); 73 + } 74 + } 75 + 76 + if allow.contains(THREADGATE_RULE_LIST) { 77 + if allow_lists.is_empty() { 78 + return Ok(true); 79 + } 80 + 81 + let count: i64 = conn 82 + .query_one( 83 + "SELECT count(*) FROM list_items WHERE list_uri=ANY($1) AND subject=$2", 84 + &[&allow_lists, &post_author], 85 + ) 86 + .await? 87 + .get(0); 88 + if count != 0 { 89 + return Ok(false); 90 + } 91 + } 92 + 93 + Ok(true) 94 + } 95 + 96 + pub async fn postgate_maintain_detaches<C: GenericClient>( 97 + conn: &mut C, 98 + post: &str, 99 + detached: &[String], 100 + disable_effective: Option<NaiveDateTime>, 101 + ) -> PgExecResult { 102 + conn.execute( 103 + "SELECT maintain_postgates($1, $2, $3)", 104 + &[&post, &detached, &disable_effective], 105 + ) 106 + .await 107 + } 108 + 109 + // variant of post_enforce_threadgate that runs when backfilling to clean up any posts already in DB 110 + pub async fn threadgate_enforce_backfill<C: GenericClient>( 111 + conn: &mut C, 112 + root_author: &str, 113 + threadgate: &AppBskyFeedThreadgate, 114 + ) -> PgExecResult { 115 + // pull out allow - if it's None we can skip this gate. 116 + let Some(allow) = threadgate.allow.as_ref() else { 117 + return Ok(0); 118 + }; 119 + 120 + let root = &threadgate.post; 121 + 122 + if allow.is_empty() { 123 + // blind update everything 124 + return conn.execute( 125 + "UPDATE posts SET violates_threadgate=TRUE WHERE root_uri=$1 AND did != $2 AND created_at >= $3", 126 + &[&root, &root_author, &threadgate.created_at], 127 + ).await; 128 + } 129 + 130 + // pull authors with our root_uri where the author is not the root author and are dated after created_at 131 + // this is mutable because we'll remove ALLOWED dids 132 + let mut dids: HashSet<String> = conn 133 + .query( 134 + "SELECT DISTINCT did FROM posts WHERE root_uri=$1 AND did != $2 AND created_at >= $3", 135 + &[&root, &root_author, &threadgate.created_at], 136 + ) 137 + .await? 138 + .into_iter() 139 + .map(|row| row.get(0)) 140 + .collect(); 141 + 142 + // this will be empty if there are no replies. 143 + if dids.is_empty() { 144 + return Ok(0); 145 + } 146 + 147 + let allowed_lists = allow 148 + .iter() 149 + .filter_map(|rule| match rule { 150 + ThreadgateRule::List { list } => Some(list), 151 + _ => None, 152 + }) 153 + .collect::<Vec<_>>(); 154 + 155 + let allow: HashSet<_> = HashSet::from_iter(allow.into_iter().map(|v| v.as_str())); 156 + 157 + if allow.contains(THREADGATE_RULE_FOLLOWER) && !dids.is_empty() { 158 + let current_dids: Vec<_> = dids.iter().collect(); 159 + 160 + let res = conn.query( 161 + "SELECT subject FROM profile_states WHERE did=$1 AND subject=ANY($2) AND followed IS NOT NULL", 162 + &[&root_author, &current_dids] 163 + ).await?; 164 + 165 + dids = &dids - &HashSet::from_iter(res.into_iter().map(|r| r.get(0))); 166 + } 167 + 168 + if allow.contains(THREADGATE_RULE_FOLLOWING) && !dids.is_empty() { 169 + let current_dids: Vec<_> = dids.iter().collect(); 170 + 171 + let res = conn.query( 172 + "SELECT subject FROM profile_states WHERE did=$1 AND subject=ANY($2) AND following IS NOT NULL", 173 + &[&root_author, &current_dids] 174 + ).await?; 175 + 176 + dids = &dids - &HashSet::from_iter(res.into_iter().map(|r| r.get(0))); 177 + } 178 + 179 + if allow.contains(THREADGATE_RULE_MENTION) && !dids.is_empty() { 180 + let mentions: Vec<String> = conn 181 + .query_opt("SELECT mentions FROM posts WHERE at_uri=$1", &[&root]) 182 + .await? 183 + .map(|r| r.get(0)) 184 + .unwrap_or_default(); 185 + 186 + dids = &dids - &HashSet::from_iter(mentions); 187 + } 188 + 189 + if allow.contains(THREADGATE_RULE_LIST) && !dids.is_empty() { 190 + let current_dids: Vec<_> = dids.iter().collect(); 191 + 192 + let res = conn 193 + .query( 194 + "SELECT subject FROM list_items WHERE list_uri = ANY($1) AND subject = ANY($2)", 195 + &[&allowed_lists, &current_dids], 196 + ) 197 + .await?; 198 + 199 + dids = &dids - &HashSet::from_iter(res.into_iter().map(|r| r.get(0))); 200 + } 201 + 202 + let dids = dids.into_iter().collect::<Vec<_>>(); 203 + 204 + conn.execute( 205 + "UPDATE posts SET violates_threadgate=TRUE WHERE root_uri = $1 AND did = ANY($2) AND created_at >= $3", 206 + &[&threadgate.post, &dids, &threadgate.created_at] 207 + ).await 208 + }
+2
consumer/src/db/mod.rs
··· 7 7 mod actor; 8 8 mod backfill; 9 9 pub mod copy; 10 + mod gates; 10 11 mod labels; 11 12 mod record; 12 13 13 14 pub use actor::*; 14 15 pub use backfill::*; 16 + pub use gates::*; 15 17 pub use labels::*; 16 18 pub use record::*;
+70 -28
consumer/src/db/record.rs
··· 1 1 use super::{PgExecResult, PgOptResult, PgResult}; 2 2 use crate::indexer::records::*; 3 - use crate::utils::{blob_ref, strongref_to_parts}; 3 + use crate::utils::{blob_ref, extract_mentions_and_tags, merge_tags, strongref_to_parts}; 4 4 use chrono::prelude::*; 5 5 use deadpool_postgres::GenericClient; 6 6 use ipld_core::cid::Cid; 7 7 use lexica::community_lexicon::bookmarks::Bookmark; 8 + use std::collections::HashSet; 8 9 9 10 pub async fn record_upsert<C: GenericClient>( 10 11 conn: &mut C, ··· 317 318 repo: &str, 318 319 cid: Cid, 319 320 rec: AppBskyFeedPost, 321 + is_backfill: bool, 320 322 ) -> PgExecResult { 321 323 let cid = cid.to_string(); 322 324 let record = serde_json::to_value(&rec).unwrap(); 325 + let (mentions, tags) = rec 326 + .facets 327 + .as_ref() 328 + .map(|v| extract_mentions_and_tags(v)) 329 + .unzip(); 323 330 let facets = rec.facets.and_then(|v| serde_json::to_value(v).ok()); 324 331 let (parent_uri, parent_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.parent)); 325 332 let (root_uri, root_cid) = strongref_to_parts(rec.reply.as_ref().map(|v| &v.root)); 326 333 let embed = rec.embed.as_ref().map(|v| v.as_str()); 327 334 let embed_subtype = rec.embed.as_ref().and_then(|v| v.subtype()); 328 335 336 + // if there is a root, we need to check for the presence of a threadgate. 337 + let violates_threadgate = match &root_uri { 338 + Some(root) => { 339 + super::post_enforce_threadgate(conn, root, repo, rec.created_at, is_backfill).await? 340 + } 341 + None => false, 342 + }; 343 + 344 + let tags = merge_tags(tags, rec.tags); 345 + 329 346 let count = conn 330 347 .execute( 331 348 include_str!("sql/post_insert.sql"), ··· 337 354 &rec.text, 338 355 &facets, 339 356 &rec.langs.unwrap_or_default(), 340 - &rec.tags.unwrap_or_default(), 357 + &tags, 341 358 &parent_uri, 342 359 &parent_cid, 343 360 &root_uri, 344 361 &root_cid, 345 362 &embed, 346 363 &embed_subtype, 364 + &mentions, 365 + &violates_threadgate, 347 366 &rec.created_at, 348 367 ], 349 368 ) 350 369 .await?; 351 370 352 371 if let Some(embed) = rec.embed.and_then(|embed| embed.into_bsky()) { 353 - post_embed_insert(conn, at_uri, embed, rec.created_at).await?; 372 + post_embed_insert(conn, at_uri, embed, rec.created_at, is_backfill).await?; 354 373 } 355 374 356 375 Ok(count) ··· 380 399 post: &str, 381 400 embed: AppBskyEmbed, 382 401 created_at: DateTime<Utc>, 402 + is_backfill: bool, 383 403 ) -> PgExecResult { 384 404 match embed { 385 405 AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await, 386 406 AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await, 387 407 AppBskyEmbed::External(embed) => post_embed_external_insert(conn, post, embed).await, 388 408 AppBskyEmbed::Record(embed) => { 389 - post_embed_record_insert(conn, post, embed, created_at).await 409 + post_embed_record_insert(conn, post, embed, created_at, is_backfill).await 390 410 } 391 411 AppBskyEmbed::RecordWithMedia(embed) => { 392 - post_embed_record_insert(conn, post, embed.record, created_at).await?; 412 + post_embed_record_insert(conn, post, embed.record, created_at, is_backfill).await?; 393 413 match *embed.media { 394 414 AppBskyEmbed::Images(embed) => post_embed_image_insert(conn, post, embed).await, 395 415 AppBskyEmbed::Video(embed) => post_embed_video_insert(conn, post, embed).await, ··· 476 496 ).await 477 497 } 478 498 499 + const PG_DISABLE_RULE: &str = "app.bsky.feed.postgate#disableRule"; 479 500 async fn post_embed_record_insert<C: GenericClient>( 480 501 conn: &mut C, 481 502 post: &str, 482 503 embed: AppBskyEmbedRecord, 483 504 post_created_at: DateTime<Utc>, 505 + is_backfill: bool, 484 506 ) -> PgExecResult { 485 507 // strip "at://" then break into parts by '/' 486 508 let parts = embed.record.uri[5..].split('/').collect::<Vec<_>>(); 487 509 488 510 let detached = if parts[1] == "app.bsky.feed.post" { 489 - let postgate_effective: Option<DateTime<Utc>> = conn 490 - .query_opt( 491 - "SELECT created_at FROM postgates WHERE post_uri=$1", 492 - &[&post], 493 - ) 494 - .await? 495 - .map(|v| v.get(0)); 511 + let pg_data = postgate_get(conn, post).await?; 496 512 497 - postgate_effective 498 - .map(|v| Utc::now().min(post_created_at) > v) 499 - .unwrap_or_default() 513 + if let Some((effective, detached, rules)) = pg_data { 514 + let detached: HashSet<String> = HashSet::from_iter(detached); 515 + let rules: HashSet<String> = HashSet::from_iter(rules); 516 + let compare_date = match is_backfill { 517 + true => post_created_at, 518 + false => Utc::now(), 519 + }; 520 + 521 + detached.contains(post) || (rules.contains(PG_DISABLE_RULE) && compare_date > effective) 522 + } else { 523 + false 524 + } 500 525 } else { 501 526 false 502 527 }; ··· 505 530 "INSERT INTO post_embed_record (post_uri, record_type, uri, cid, detached) VALUES ($1, $2, $3, $4, $5)", 506 531 &[&post, &parts[1], &embed.record.uri, &embed.record.cid.to_string(), &detached], 507 532 ).await 533 + } 534 + 535 + async fn postgate_get<C: GenericClient>( 536 + conn: &mut C, 537 + post: &str, 538 + ) -> PgOptResult<(DateTime<Utc>, Vec<String>, Vec<String>)> { 539 + let res = conn 540 + .query_opt( 541 + "SELECT created_at, detached, rules FROM postgates WHERE post_uri=$1", 542 + &[&post], 543 + ) 544 + .await? 545 + .map(|v| (v.get(0), v.get(1), v.get(2))); 546 + 547 + Ok(res) 508 548 } 509 549 510 550 pub async fn postgate_upsert<C: GenericClient>( ··· 536 576 pub async fn postgate_delete<C: GenericClient>(conn: &mut C, at_uri: &str) -> PgExecResult { 537 577 conn.execute("DELETE FROM postgates WHERE at_uri=$1", &[&at_uri]) 538 578 .await 539 - } 540 - 541 - pub async fn postgate_maintain_detaches<C: GenericClient>( 542 - conn: &mut C, 543 - post: &str, 544 - detached: &[String], 545 - disable_effective: Option<NaiveDateTime>, 546 - ) -> PgExecResult { 547 - conn.execute( 548 - "SELECT maintain_postgates($1, $2, $3)", 549 - &[&post, &detached, &disable_effective], 550 - ) 551 - .await 552 579 } 553 580 554 581 pub async fn profile_upsert<C: GenericClient>( ··· 698 725 pub async fn status_delete<C: GenericClient>(conn: &mut C, did: &str) -> PgExecResult { 699 726 conn.execute("DELETE FROM statuses WHERE did=$1", &[&did]) 700 727 .await 728 + } 729 + 730 + pub async fn threadgate_get<C: GenericClient>( 731 + conn: &mut C, 732 + post: &str, 733 + ) -> PgOptResult<(DateTime<Utc>, Vec<String>, Vec<String>)> { 734 + let res = conn 735 + .query_opt( 736 + "SELECT created_at, allow, allowed_lists FROM threadgates WHERE post_uri=$1 AND allow IS NOT NULL", 737 + &[&post], 738 + ) 739 + .await? 740 + .map(|v| (v.get(0), v.get(1), v.get(2))); 741 + 742 + Ok(res) 701 743 } 702 744 703 745 pub async fn threadgate_upsert<C: GenericClient>(
+2 -2
consumer/src/db/sql/post_insert.sql
··· 1 1 INSERT INTO posts (at_uri, did, cid, record, content, facets, languages, tags, parent_uri, parent_cid, root_uri, 2 - root_cid, embed, embed_subtype, created_at) 3 - VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) 2 + root_cid, embed, embed_subtype, mentions, violates_threadgate, created_at) 3 + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17) 4 4 ON CONFLICT DO NOTHING
+1 -1
consumer/src/indexer/mod.rs
··· 625 625 }); 626 626 627 627 let labels = record.labels.clone(); 628 - db::post_insert(conn, at_uri, repo, cid, record).await?; 628 + db::post_insert(conn, at_uri, repo, cid, record, false).await?; 629 629 if let Some(labels) = labels { 630 630 db::maintain_self_labels(conn, repo, Some(cid), at_uri, labels).await?; 631 631 }
+9 -4
consumer/src/indexer/records.rs
··· 272 272 pub hidden_replies: Vec<String>, 273 273 } 274 274 275 + pub const THREADGATE_RULE_MENTION: &str = "app.bsky.feed.threadgate#mentionRule"; 276 + pub const THREADGATE_RULE_FOLLOWER: &str = "app.bsky.feed.threadgate#followerRule"; 277 + pub const THREADGATE_RULE_FOLLOWING: &str = "app.bsky.feed.threadgate#followingRule"; 278 + pub const THREADGATE_RULE_LIST: &str = "app.bsky.feed.threadgate#listRule"; 279 + 275 280 #[derive(Debug, Deserialize, Serialize)] 276 281 #[serde(tag = "$type")] 277 282 pub enum ThreadgateRule { ··· 288 293 impl ThreadgateRule { 289 294 pub fn as_str(&self) -> &'static str { 290 295 match self { 291 - ThreadgateRule::Mention => "app.bsky.feed.threadgate#mentionRule", 292 - ThreadgateRule::Follower => "app.bsky.feed.threadgate#followerRule", 293 - ThreadgateRule::Following => "app.bsky.feed.threadgate#followingRule", 294 - ThreadgateRule::List { .. } => "app.bsky.feed.threadgate#listRule", 296 + ThreadgateRule::Mention => THREADGATE_RULE_MENTION, 297 + ThreadgateRule::Follower => THREADGATE_RULE_FOLLOWER, 298 + ThreadgateRule::Following => THREADGATE_RULE_FOLLOWING, 299 + ThreadgateRule::List { .. } => THREADGATE_RULE_LIST, 295 300 } 296 301 } 297 302 }
+31
consumer/src/utils.rs
··· 1 + use lexica::app_bsky::richtext::{Facet, FacetMain, FacetOuter}; 1 2 use lexica::{Blob, StrongRef}; 2 3 use serde::{Deserialize, Deserializer}; 3 4 ··· 39 40 40 41 did == split_aturi[2] 41 42 } 43 + 44 + pub fn extract_mentions_and_tags(from: &[FacetMain]) -> (Vec<String>, Vec<String>) { 45 + let (mentions, tags) = from 46 + .iter() 47 + .flat_map(|v| { 48 + v.features.iter().map(|facet| match facet { 49 + FacetOuter::Bsky(Facet::Mention { did }) => (Some(did), None), 50 + FacetOuter::Bsky(Facet::Tag { tag }) => (None, Some(tag)), 51 + _ => (None, None), 52 + }) 53 + }) 54 + .unzip::<_, _, Vec<_>, Vec<_>>(); 55 + 56 + let mentions = mentions.into_iter().flatten().cloned().collect(); 57 + let tags = tags.into_iter().flatten().cloned().collect(); 58 + 59 + (mentions, tags) 60 + } 61 + 62 + pub fn merge_tags<T>(t1: Option<Vec<T>>, t2: Option<Vec<T>>) -> Vec<T> { 63 + match (t1, t2) { 64 + (Some(t1), None) => t1, 65 + (None, Some(t2)) => t2, 66 + (Some(mut t1), Some(t2)) => { 67 + t1.extend(t2); 68 + t1 69 + } 70 + _ => Vec::default(), 71 + } 72 + }
+15
migrations/2025-09-27-171241_post-tweaks/down.sql
··· 1 + alter table posts 2 + drop column mentions, 3 + drop column violates_threadgate; 4 + 5 + drop trigger t_author_feed_ins_post on posts; 6 + drop trigger t_author_feed_del_post on posts; 7 + drop trigger t_author_feed_ins_repost on reposts; 8 + drop trigger t_author_feed_del_repost on reposts; 9 + 10 + drop function f_author_feed_ins_post; 11 + drop function f_author_feed_del_post; 12 + drop function f_author_feed_ins_repost; 13 + drop function f_author_feed_del_repost; 14 + 15 + drop table author_feeds;
+79
migrations/2025-09-27-171241_post-tweaks/up.sql
··· 1 + alter table posts 2 + add column mentions text[], 3 + add column violates_threadgate bool not null default false; 4 + 5 + create table author_feeds 6 + ( 7 + uri text primary key, 8 + cid text not null, 9 + post text not null, 10 + did text not null, 11 + typ text not null, 12 + sort_at timestamptz not null 13 + ); 14 + 15 + -- author_feeds post triggers 16 + create function f_author_feed_ins_post() returns trigger 17 + language plpgsql as 18 + $$ 19 + begin 20 + insert into author_feeds (uri, cid, post, did, typ, sort_at) 21 + VALUES (NEW.at_uri, NEW.cid, NEW.at_uri, NEW.did, 'post', NEW.created_at) 22 + on conflict do nothing; 23 + return NEW; 24 + end; 25 + $$; 26 + 27 + create trigger t_author_feed_ins_post 28 + before insert 29 + on posts 30 + for each row 31 + execute procedure f_author_feed_ins_post(); 32 + 33 + create function f_author_feed_del_post() returns trigger 34 + language plpgsql as 35 + $$ 36 + begin 37 + delete from author_feeds where did = OLD.did and item = OLD.at_uri and typ = 'post'; 38 + return OLD; 39 + end; 40 + $$; 41 + 42 + create trigger t_author_feed_del_post 43 + before delete 44 + on posts 45 + for each row 46 + execute procedure f_author_feed_del_post(); 47 + 48 + -- author_feeds repost triggers 49 + create function f_author_feed_ins_repost() returns trigger 50 + language plpgsql as 51 + $$ 52 + begin 53 + insert into author_feeds (uri, cid, post, did, typ, sort_at) 54 + VALUES ('at://' || NEW.did || 'app.bsky.feed.repost' || NEW.rkey, NEW.post_cid, NEW.post, NEW.did, 'repost', NEW.created_at) 55 + on conflict do nothing; 56 + return NEW; 57 + end; 58 + $$; 59 + 60 + create trigger t_author_feed_ins_repost 61 + before insert 62 + on reposts 63 + for each row 64 + execute procedure f_author_feed_ins_repost(); 65 + 66 + create function f_author_feed_del_repost() returns trigger 67 + language plpgsql as 68 + $$ 69 + begin 70 + delete from author_feeds where did = OLD.did and item = OLD.post and typ = 'repost'; 71 + return OLD; 72 + end; 73 + $$; 74 + 75 + create trigger t_author_feed_del_repost 76 + before delete 77 + on reposts 78 + for each row 79 + execute procedure f_author_feed_del_repost();
+16
parakeet-db/src/models.rs
··· 148 148 pub embed: Option<String>, 149 149 pub embed_subtype: Option<String>, 150 150 151 + pub mentions: Option<Vec<Option<String>>>, 152 + pub violates_threadgate: bool, 153 + 151 154 pub created_at: DateTime<Utc>, 152 155 pub indexed_at: NaiveDateTime, 153 156 } ··· 414 417 pub subject_type: &'a str, 415 418 pub tags: Vec<String>, 416 419 } 420 + 421 + #[derive(Debug, Queryable, Selectable, Identifiable)] 422 + #[diesel(table_name = crate::schema::author_feeds)] 423 + #[diesel(primary_key(uri))] 424 + #[diesel(check_for_backend(diesel::pg::Pg))] 425 + pub struct AuthorFeedItem { 426 + pub uri: String, 427 + pub cid: String, 428 + pub post: String, 429 + pub did: String, 430 + pub typ: String, 431 + pub sort_at: DateTime<Utc>, 432 + }
+14
parakeet-db/src/schema.rs
··· 13 13 } 14 14 15 15 diesel::table! { 16 + author_feeds (uri) { 17 + uri -> Text, 18 + cid -> Text, 19 + post -> Text, 20 + did -> Text, 21 + typ -> Text, 22 + sort_at -> Timestamptz, 23 + } 24 + } 25 + 26 + diesel::table! { 16 27 backfill (repo, repo_ver) { 17 28 repo -> Text, 18 29 repo_ver -> Text, ··· 284 295 embed_subtype -> Nullable<Text>, 285 296 created_at -> Timestamptz, 286 297 indexed_at -> Timestamp, 298 + mentions -> Nullable<Array<Nullable<Text>>>, 299 + violates_threadgate -> Bool, 287 300 } 288 301 } 289 302 ··· 429 442 430 443 diesel::allow_tables_to_appear_in_same_query!( 431 444 actors, 445 + author_feeds, 432 446 backfill, 433 447 backfill_jobs, 434 448 blocks,
+159 -80
parakeet/src/hydration/posts.rs
··· 3 3 use lexica::app_bsky::actor::ProfileViewBasic; 4 4 use lexica::app_bsky::embed::Embed; 5 5 use lexica::app_bsky::feed::{ 6 - BlockedAuthor, FeedViewPost, PostView, PostViewerState, ReplyRef, ReplyRefPost, ThreadgateView, 6 + BlockedAuthor, FeedReasonRepost, FeedViewPost, FeedViewPostReason, PostView, PostViewerState, 7 + ReplyRef, ReplyRefPost, ThreadgateView, 7 8 }; 8 9 use lexica::app_bsky::graph::ListViewBasic; 9 10 use lexica::app_bsky::RecordStats; ··· 32 33 } 33 34 } 34 35 36 + type HydratePostsRet = ( 37 + models::Post, 38 + ProfileViewBasic, 39 + Vec<models::Label>, 40 + Option<Embed>, 41 + Option<ThreadgateView>, 42 + Option<PostViewerState>, 43 + Option<PostStats>, 44 + ); 45 + 35 46 fn build_postview( 36 - post: models::Post, 37 - author: ProfileViewBasic, 38 - labels: Vec<models::Label>, 39 - embed: Option<Embed>, 40 - threadgate: Option<ThreadgateView>, 41 - viewer: Option<PostViewerState>, 42 - stats: Option<PostStats>, 47 + (post, author, labels, embed, threadgate, viewer, stats): HydratePostsRet, 43 48 ) -> PostView { 44 49 let stats = stats 45 50 .map(|stats| RecordStats { ··· 135 140 let threadgate = self.hydrate_threadgate(threadgate).await; 136 141 let labels = self.get_label(&post.at_uri).await; 137 142 138 - Some(build_postview( 143 + Some(build_postview(( 139 144 post, author, labels, embed, threadgate, viewer, stats, 140 - )) 145 + ))) 141 146 } 142 147 143 - pub async fn hydrate_posts(&self, posts: Vec<String>) -> HashMap<String, PostView> { 148 + async fn hydrate_posts_inner(&self, posts: Vec<String>) -> HashMap<String, HydratePostsRet> { 144 149 let stats = self.loaders.post_stats.load_many(posts.clone()).await; 145 150 let posts = self.loaders.posts.load_many(posts).await; 146 151 ··· 150 155 .unzip::<_, _, Vec<_>, Vec<_>>(); 151 156 let authors = self.hydrate_profiles_basic(authors).await; 152 157 153 - let post_labels = self.get_label_many(&post_uris).await; 154 - let viewer_data = self.get_post_viewer_states(&post_uris).await; 158 + let mut post_labels = self.get_label_many(&post_uris).await; 159 + let mut viewer_data = self.get_post_viewer_states(&post_uris).await; 155 160 156 161 let threadgates = posts 157 162 .values() ··· 159 164 .collect(); 160 165 let threadgates = self.hydrate_threadgates(threadgates).await; 161 166 162 - let embeds = self.hydrate_embeds(post_uris).await; 167 + let mut embeds = self.hydrate_embeds(post_uris).await; 163 168 164 169 posts 165 170 .into_iter() 166 171 .filter_map(|(uri, (post, threadgate))| { 167 - let author = authors.get(&post.did)?; 168 - let embed = embeds.get(&uri).cloned(); 172 + let author = authors.get(&post.did)?.clone(); 173 + let embed = embeds.remove(&uri); 169 174 let threadgate = threadgate.and_then(|tg| threadgates.get(&tg.at_uri).cloned()); 170 - let labels = post_labels.get(&uri).cloned().unwrap_or_default(); 175 + let labels = post_labels.remove(&uri).unwrap_or_default(); 171 176 let stats = stats.get(&uri).cloned(); 172 - let viewer = viewer_data.get(&uri).cloned(); 177 + let viewer = viewer_data.remove(&uri); 173 178 174 179 Some(( 175 180 uri, 176 - build_postview( 177 - post, 178 - author.to_owned(), 179 - labels, 180 - embed, 181 - threadgate, 182 - viewer, 183 - stats, 184 - ), 181 + (post, author, labels, embed, threadgate, viewer, stats), 185 182 )) 186 183 }) 187 184 .collect() 188 185 } 189 186 190 - pub async fn hydrate_feed_posts(&self, posts: Vec<String>) -> HashMap<String, FeedViewPost> { 191 - let stats = self.loaders.post_stats.load_many(posts.clone()).await; 192 - let posts = self.loaders.posts.load_many(posts).await; 193 - 194 - let (authors, post_uris) = posts 195 - .values() 196 - .map(|(post, _)| (post.did.clone(), post.at_uri.clone())) 197 - .unzip::<_, _, Vec<_>, Vec<_>>(); 198 - let authors = self.hydrate_profiles_basic(authors).await; 187 + pub async fn hydrate_posts(&self, posts: Vec<String>) -> HashMap<String, PostView> { 188 + self.hydrate_posts_inner(posts) 189 + .await 190 + .into_iter() 191 + .map(|(uri, data)| (uri, build_postview(data))) 192 + .collect() 193 + } 199 194 200 - let post_labels = self.get_label_many(&post_uris).await; 201 - let viewer_data = self.get_post_viewer_states(&post_uris).await; 202 - let embeds = self.hydrate_embeds(post_uris).await; 195 + pub async fn hydrate_feed_posts( 196 + &self, 197 + posts: Vec<RawFeedItem>, 198 + author_threads_only: bool, 199 + ) -> Vec<FeedViewPost> { 200 + let post_uris = posts 201 + .iter() 202 + .map(|item| item.post_uri().to_string()) 203 + .collect::<Vec<_>>(); 204 + let mut posts_hyd = self.hydrate_posts_inner(post_uris).await; 203 205 204 - let reply_refs = posts 206 + // we shouldn't show the parent when the post violates a threadgate. 207 + let reply_refs = posts_hyd 205 208 .values() 206 - .flat_map(|(post, _)| [post.parent_uri.clone(), post.root_uri.clone()]) 209 + .filter(|(post, ..)| !post.violates_threadgate) 210 + .flat_map(|(post, ..)| [post.parent_uri.clone(), post.root_uri.clone()]) 207 211 .flatten() 208 212 .collect::<Vec<_>>(); 209 - 210 213 let reply_posts = self.hydrate_posts(reply_refs).await; 211 214 215 + let repost_profiles = posts 216 + .iter() 217 + .filter_map(|item| item.repost_by()) 218 + .collect::<Vec<_>>(); 219 + let profiles_hydrated = self.hydrate_profiles_basic(repost_profiles).await; 220 + 212 221 posts 213 222 .into_iter() 214 - .filter_map(|(post_uri, (post, _))| { 215 - let author = authors.get(&post.did)?; 223 + .filter_map(|item| { 224 + let post = posts_hyd.remove(item.post_uri())?; 225 + let context = item.context(); 226 + 227 + let reply = if let RawFeedItem::Post { .. } = item { 228 + let root_uri = post.0.root_uri.as_ref(); 229 + let parent_uri = post.0.parent_uri.as_ref(); 216 230 217 - let root = post.root_uri.as_ref().and_then(|uri| reply_posts.get(uri)); 218 - let parent = post 219 - .parent_uri 220 - .as_ref() 221 - .and_then(|uri| reply_posts.get(uri)); 231 + let (root, parent) = if author_threads_only { 232 + if root_uri.is_some() && parent_uri.is_some() { 233 + let root = root_uri.and_then(|uri| posts_hyd.get(uri))?; 234 + let parent = parent_uri.and_then(|uri| posts_hyd.get(uri))?; 235 + 236 + let root = build_postview(root.clone()); 237 + let parent = build_postview(parent.clone()); 238 + 239 + (Some(root), Some(parent)) 240 + } else { 241 + (None, None) 242 + } 243 + } else { 244 + let root = root_uri.and_then(|uri| reply_posts.get(uri)).cloned(); 245 + let parent = parent_uri.and_then(|uri| reply_posts.get(uri)).cloned(); 246 + 247 + (root, parent) 248 + }; 222 249 223 - let reply = if post.parent_uri.is_some() && post.root_uri.is_some() { 224 - Some(ReplyRef { 225 - root: root.cloned().map(postview_to_replyref).unwrap_or( 226 - ReplyRefPost::NotFound { 227 - uri: post.root_uri.as_ref().unwrap().clone(), 228 - not_found: true, 229 - }, 230 - ), 231 - parent: parent.cloned().map(postview_to_replyref).unwrap_or( 232 - ReplyRefPost::NotFound { 233 - uri: post.parent_uri.as_ref().unwrap().clone(), 234 - not_found: true, 235 - }, 236 - ), 237 - grandparent_author: None, 238 - }) 250 + if root_uri.is_some() || parent_uri.is_some() { 251 + Some(ReplyRef { 252 + root: root.map(postview_to_replyref).unwrap_or( 253 + ReplyRefPost::NotFound { 254 + uri: root_uri.unwrap().to_owned(), 255 + not_found: true, 256 + }, 257 + ), 258 + parent: parent.map(postview_to_replyref).unwrap_or( 259 + ReplyRefPost::NotFound { 260 + uri: parent_uri.unwrap().to_owned(), 261 + not_found: true, 262 + }, 263 + ), 264 + grandparent_author: None, 265 + }) 266 + } else { 267 + None 268 + } 239 269 } else { 240 270 None 241 271 }; 242 272 243 - let embed = embeds.get(&post_uri).cloned(); 244 - let labels = post_labels.get(&post_uri).cloned().unwrap_or_default(); 245 - let stats = stats.get(&post_uri).cloned(); 246 - let viewer = viewer_data.get(&post_uri).cloned(); 247 - let post = 248 - build_postview(post, author.to_owned(), labels, embed, None, viewer, stats); 273 + let reason = match item { 274 + RawFeedItem::Repost { uri, by, at, .. } => { 275 + Some(FeedViewPostReason::Repost(FeedReasonRepost { 276 + by: profiles_hydrated.get(&by).cloned()?, 277 + uri: Some(uri), 278 + cid: None, 279 + indexed_at: at, 280 + })) 281 + } 282 + RawFeedItem::Pin { .. } => Some(FeedViewPostReason::Pin), 283 + _ => None, 284 + }; 285 + 286 + let post = build_postview(post); 249 287 250 - Some(( 251 - post_uri, 252 - FeedViewPost { 253 - post, 254 - reply, 255 - reason: None, 256 - feed_context: None, 257 - }, 258 - )) 288 + Some(FeedViewPost { 289 + post, 290 + reply, 291 + reason, 292 + feed_context: context, 293 + }) 259 294 }) 260 295 .collect() 261 296 } ··· 299 334 _ => ReplyRefPost::Post(post), 300 335 } 301 336 } 337 + 338 + #[derive(Debug)] 339 + pub enum RawFeedItem { 340 + Pin { 341 + uri: String, 342 + context: Option<String>, 343 + }, 344 + Post { 345 + uri: String, 346 + context: Option<String>, 347 + }, 348 + Repost { 349 + uri: String, 350 + post: String, 351 + by: String, 352 + at: chrono::DateTime<chrono::Utc>, 353 + context: Option<String>, 354 + }, 355 + } 356 + 357 + impl RawFeedItem { 358 + fn post_uri(&self) -> &str { 359 + match self { 360 + RawFeedItem::Pin { uri, .. } => uri, 361 + RawFeedItem::Post { uri, .. } => uri, 362 + RawFeedItem::Repost { post, .. } => post, 363 + } 364 + } 365 + 366 + fn repost_by(&self) -> Option<String> { 367 + match self { 368 + RawFeedItem::Repost { by, .. } => Some(by.clone()), 369 + _ => None, 370 + } 371 + } 372 + 373 + fn context(&self) -> Option<String> { 374 + match self { 375 + RawFeedItem::Pin { context, .. } => context.clone(), 376 + RawFeedItem::Post { context, .. } => context.clone(), 377 + RawFeedItem::Repost { context, .. } => context.clone(), 378 + } 379 + } 380 + }
+2 -2
parakeet/src/sql/thread.sql
··· 1 1 with recursive thread as (select at_uri, parent_uri, root_uri, 0 as depth 2 2 from posts 3 - where parent_uri = $1 3 + where parent_uri = $1 and violates_threadgate=FALSE 4 4 union all 5 5 select p.at_uri, p.parent_uri, p.root_uri, thread.depth + 1 6 6 from posts p 7 7 join thread on p.parent_uri = thread.at_uri 8 - where thread.depth <= $2) 8 + where thread.depth <= $2 and p.violates_threadgate=FALSE) 9 9 select * 10 10 from thread 11 11 order by depth desc;
+4 -2
parakeet/src/sql/thread_parent.sql
··· 1 1 with recursive parents as (select at_uri, cid, parent_uri, root_uri, 0 as depth 2 2 from posts 3 - where at_uri = (select parent_uri from posts where at_uri = $1) 3 + where 4 + at_uri = (select parent_uri from posts where at_uri = $1 and violates_threadgate = FALSE) 4 5 union all 5 6 select p.at_uri, p.cid, p.parent_uri, p.root_uri, parents.depth + 1 6 7 from posts p 7 8 join parents on p.at_uri = parents.parent_uri 8 - where parents.depth <= $2) 9 + where parents.depth <= $2 10 + and p.violates_threadgate = FALSE) 9 11 select * 10 12 from parents 11 13 order by depth desc;
+7 -8
parakeet/src/xrpc/app_bsky/feed/likes.rs
··· 1 + use crate::hydration::posts::RawFeedItem; 1 2 use crate::hydration::StatefulHydrator; 2 3 use crate::xrpc::error::{Error, XrpcResult}; 3 4 use crate::xrpc::extract::{AtpAcceptLabelers, AtpAuth}; ··· 57 58 .last() 58 59 .map(|(last, _)| last.timestamp_millis().to_string()); 59 60 60 - let at_uris = results 61 + let raw_feed = results 61 62 .iter() 62 - .map(|(_, uri)| uri.clone()) 63 + .map(|(_, uri)| RawFeedItem::Post { 64 + uri: uri.clone(), 65 + context: None, 66 + }) 63 67 .collect::<Vec<_>>(); 64 68 65 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 66 - 67 - let feed: Vec<_> = results 68 - .into_iter() 69 - .filter_map(|(_, uri)| posts.remove(&uri)) 70 - .collect(); 69 + let feed = hyd.hydrate_feed_posts(raw_feed, false).await; 71 70 72 71 Ok(Json(FeedRes { cursor, feed })) 73 72 }
+83 -92
parakeet/src/xrpc/app_bsky/feed/posts.rs
··· 1 + use crate::hydration::posts::RawFeedItem; 1 2 use crate::hydration::StatefulHydrator; 2 3 use crate::xrpc::app_bsky::graph::lists::ListWithCursorQuery; 3 4 use crate::xrpc::error::{Error, XrpcResult}; ··· 16 17 use diesel_async::{AsyncPgConnection, RunQueryDsl}; 17 18 use lexica::app_bsky::actor::ProfileView; 18 19 use lexica::app_bsky::feed::{ 19 - BlockedAuthor, FeedReasonRepost, FeedSkeletonResponse, FeedViewPost, FeedViewPostReason, 20 - PostView, SkeletonReason, ThreadViewPost, ThreadViewPostType, ThreadgateView, 20 + BlockedAuthor, FeedSkeletonResponse, FeedViewPost, PostView, SkeletonReason, ThreadViewPost, 21 + ThreadViewPostType, ThreadgateView, 21 22 }; 22 - use parakeet_db::schema; 23 + use parakeet_db::{models, schema}; 23 24 use reqwest::Url; 24 25 use serde::{Deserialize, Serialize}; 25 26 use std::collections::HashMap; ··· 113 114 114 115 let hyd = StatefulHydrator::new(&state.dataloaders, &state.cdn, &labelers, maybe_auth); 115 116 116 - let at_uris = skeleton.feed.iter().map(|v| v.post.clone()).collect(); 117 117 let repost_skeleton = skeleton 118 118 .feed 119 119 .iter() ··· 122 122 _ => None, 123 123 }) 124 124 .collect::<Vec<_>>(); 125 + let mut repost_data = get_skeleton_repost_data(&mut conn, repost_skeleton).await; 125 126 126 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 127 - let mut repost_data = get_skeleton_repost_data(&mut conn, &hyd, repost_skeleton).await; 128 - 129 - let feed = skeleton 127 + let raw_feed = skeleton 130 128 .feed 131 129 .into_iter() 132 - .filter_map(|item| { 133 - let mut post = posts.remove(&item.post)?; 134 - let reason = match item.reason { 135 - Some(SkeletonReason::Repost { repost }) => { 136 - repost_data.remove(&repost).map(FeedViewPostReason::Repost) 137 - } 138 - Some(SkeletonReason::Pin {}) => Some(FeedViewPostReason::Pin), 139 - _ => None, 140 - }; 141 - 142 - post.reason = reason; 143 - post.feed_context = item.feed_context; 144 - 145 - Some(post) 130 + .filter_map(|v| match v.reason { 131 + Some(SkeletonReason::Repost { repost }) => { 132 + repost_data 133 + .remove_entry(&repost) 134 + .map(|(uri, (by, at))| RawFeedItem::Repost { 135 + uri, 136 + post: v.post, 137 + by, 138 + at: at.and_utc(), 139 + context: v.feed_context, 140 + }) 141 + } 142 + Some(SkeletonReason::Pin {}) => Some(RawFeedItem::Pin { 143 + uri: v.post, 144 + context: v.feed_context, 145 + }), 146 + None => Some(RawFeedItem::Post { 147 + uri: v.post, 148 + context: v.feed_context, 149 + }), 146 150 }) 147 151 .collect(); 148 152 153 + let feed = hyd.hydrate_feed_posts(raw_feed, false).await; 154 + 149 155 Ok(Json(FeedRes { 150 156 cursor: skeleton.cursor, 151 157 feed, 152 158 })) 153 159 } 154 160 155 - #[derive(Debug, Deserialize)] 161 + #[derive(Debug, Default, Eq, PartialEq, Deserialize)] 156 162 #[serde(rename_all = "snake_case")] 157 163 pub enum GetAuthorFeedFilter { 164 + #[default] 158 165 PostsWithReplies, 159 166 PostsNoReplies, 160 167 PostsWithMedia, 161 168 PostsAndAuthorThreads, 162 169 PostsWithVideo, 163 - } 164 - 165 - impl Default for GetAuthorFeedFilter { 166 - fn default() -> Self { 167 - Self::PostsWithReplies 168 - } 169 170 } 170 171 171 172 #[derive(Debug, Deserialize)] ··· 209 210 210 211 let pin = match query.include_pins && query.cursor.is_none() { 211 212 false => None, 212 - true => match crate::db::get_pinned_post_uri(&mut conn, &did).await? { 213 - Some(post) => hyd.hydrate_post(post).await, 214 - None => None, 215 - }, 213 + true => crate::db::get_pinned_post_uri(&mut conn, &did).await?, 216 214 }; 217 215 218 216 let limit = query.limit.unwrap_or(50).clamp(1, 100); 219 217 220 - let mut posts_query = schema::posts::table 221 - .select((schema::posts::created_at, schema::posts::at_uri)) 222 - .filter(schema::posts::did.eq(did)) 218 + let mut posts_query = schema::author_feeds::table 219 + .select(models::AuthorFeedItem::as_select()) 220 + .left_join(schema::posts::table.on(schema::posts::at_uri.eq(schema::author_feeds::post))) 221 + .filter(schema::author_feeds::did.eq(&did)) 223 222 .into_boxed(); 224 223 225 224 if let Some(cursor) = datetime_cursor(query.cursor.as_ref()) { 226 - posts_query = posts_query.filter(schema::posts::created_at.lt(cursor)); 225 + posts_query = posts_query.filter(schema::author_feeds::sort_at.lt(cursor)); 227 226 } 228 227 228 + let author_threads_only = query.filter == GetAuthorFeedFilter::PostsAndAuthorThreads; 229 229 posts_query = match query.filter { 230 - GetAuthorFeedFilter::PostsWithReplies => posts_query, 230 + GetAuthorFeedFilter::PostsWithReplies => { 231 + posts_query.filter(schema::author_feeds::typ.eq("post")) 232 + } 231 233 GetAuthorFeedFilter::PostsNoReplies => { 232 234 posts_query.filter(schema::posts::parent_uri.is_null()) 233 235 } 234 - GetAuthorFeedFilter::PostsWithMedia => posts_query.filter(embed_type_filter(&[ 235 - "app.bsky.embed.video", 236 - "app.bsky.embed.images", 237 - ])), 236 + GetAuthorFeedFilter::PostsWithMedia => posts_query.filter( 237 + embed_type_filter(&["app.bsky.embed.video", "app.bsky.embed.images"]) 238 + .and(schema::author_feeds::typ.eq("post")), 239 + ), 238 240 GetAuthorFeedFilter::PostsAndAuthorThreads => posts_query.filter( 239 241 (schema::posts::parent_uri 240 - .like(format!("at://{}/%", &query.actor)) 242 + .like(format!("at://{did}/%")) 241 243 .or(schema::posts::parent_uri.is_null())) 242 244 .and( 243 245 schema::posts::root_uri 244 - .like(format!("at://{}/%", &query.actor)) 246 + .like(format!("at://{did}/%")) 245 247 .or(schema::posts::root_uri.is_null()), 246 248 ), 247 249 ), 248 - GetAuthorFeedFilter::PostsWithVideo => { 249 - posts_query.filter(embed_type_filter(&["app.bsky.embed.video"])) 250 - } 250 + GetAuthorFeedFilter::PostsWithVideo => posts_query.filter( 251 + embed_type_filter(&["app.bsky.embed.video"]).and(schema::author_feeds::typ.eq("post")), 252 + ), 251 253 }; 252 254 253 255 let results = posts_query 254 - .order(schema::posts::created_at.desc()) 256 + .order(schema::author_feeds::sort_at.desc()) 255 257 .limit(limit as i64) 256 - .load::<(chrono::DateTime<chrono::Utc>, String)>(&mut conn) 258 + .load(&mut conn) 257 259 .await?; 258 260 259 261 let cursor = results 260 262 .last() 261 - .map(|(last, _)| last.timestamp_millis().to_string()); 263 + .map(|item| item.sort_at.timestamp_millis().to_string()); 262 264 263 - let at_uris = results 264 - .iter() 265 - .map(|(_, uri)| uri.clone()) 266 - .collect::<Vec<_>>(); 267 - 268 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 269 - 270 - let mut feed: Vec<_> = results 265 + let mut raw_feed = results 271 266 .into_iter() 272 - .filter_map(|(_, uri)| posts.remove(&uri)) 273 - .collect(); 267 + .filter_map(|item| match &*item.typ { 268 + "post" => Some(RawFeedItem::Post { 269 + uri: item.post, 270 + context: None, 271 + }), 272 + "repost" => Some(RawFeedItem::Repost { 273 + uri: item.uri, 274 + post: item.post, 275 + by: item.did, 276 + at: item.sort_at, 277 + context: None, 278 + }), 279 + _ => None, 280 + }) 281 + .collect::<Vec<_>>(); 274 282 275 283 if let Some(post) = pin { 276 - feed.insert( 284 + raw_feed.insert( 277 285 0, 278 - FeedViewPost { 279 - post, 280 - reply: None, 281 - reason: Some(FeedViewPostReason::Pin), 282 - feed_context: None, 286 + RawFeedItem::Pin { 287 + uri: post, 288 + context: None, 283 289 }, 284 290 ); 285 291 } 292 + 293 + let feed = hyd.hydrate_feed_posts(raw_feed, author_threads_only).await; 286 294 287 295 Ok(Json(FeedRes { cursor, feed })) 288 296 } ··· 325 333 .last() 326 334 .map(|(last, _)| last.timestamp_millis().to_string()); 327 335 328 - let at_uris = results 336 + let raw_feed = results 329 337 .iter() 330 - .map(|(_, uri)| uri.clone()) 338 + .map(|(_, uri)| RawFeedItem::Post { 339 + uri: uri.clone(), 340 + context: None, 341 + }) 331 342 .collect::<Vec<_>>(); 332 343 333 - let mut posts = hyd.hydrate_feed_posts(at_uris).await; 334 - 335 - let feed = results 336 - .into_iter() 337 - .filter_map(|(_, uri)| posts.remove(&uri)) 338 - .collect(); 344 + let feed = hyd.hydrate_feed_posts(raw_feed, false).await; 339 345 340 346 Ok(Json(FeedRes { cursor, feed })) 341 347 } ··· 669 675 } 670 676 } 671 677 672 - async fn get_skeleton_repost_data<'a>( 678 + async fn get_skeleton_repost_data( 673 679 conn: &mut AsyncPgConnection, 674 - hyd: &StatefulHydrator<'a>, 675 680 reposts: Vec<String>, 676 - ) -> HashMap<String, FeedReasonRepost> { 681 + ) -> HashMap<String, (String, NaiveDateTime)> { 677 682 let Ok(repost_data) = schema::records::table 678 683 .select(( 679 684 schema::records::at_uri, ··· 687 692 return HashMap::new(); 688 693 }; 689 694 690 - let profiles = repost_data.iter().map(|(_, did, _)| did.clone()).collect(); 691 - let profiles = hyd.hydrate_profiles_basic(profiles).await; 692 - 693 695 repost_data 694 696 .into_iter() 695 - .filter_map(|(uri, did, indexed_at)| { 696 - let by = profiles.get(&did).cloned()?; 697 - 698 - let repost = FeedReasonRepost { 699 - by, 700 - uri: Some(uri.clone()), 701 - cid: None, // okay, we do have this, but the app doesn't seem to be bothered about not setting it. 702 - indexed_at: indexed_at.and_utc(), 703 - }; 704 - 705 - Some((uri, repost)) 706 - }) 697 + .map(|(uri, did, at)| (uri, (did, at))) 707 698 .collect() 708 699 } 709 700