Rust AppView - highly experimental!
1//! Reference extraction from AT Protocol records 2//! 3//! This module extracts actor DIDs and other references from records that need 4//! to be resolved before the record can be processed. This includes: 5//! - Subject actors for Follow, Block, ListItem, Verification records 6//! - Referenced actors in posts (reply parents, quote embeds, mentions) 7//! - Referenced actors in likes and reposts 8 9use crate::relay::types::RecordTypes; 10use crate::types::records::{AppBskyEmbed, ThreadgateRule}; 11 12/// Information about references that need to be resolved from a record 13#[derive(Debug, Clone)] 14pub struct RecordReferences { 15 /// The subject actor DID (for records that have a direct subject_actor_id FK) 16 /// Returns Some(did) for: Follow, Block, ListItem, Verification 17 pub subject_did: Option<String>, 18 19 /// Additional actor DIDs that need to exist (but don't have FK relationships in the record table) 20 /// Includes: like/repost targets, threadgate list owners 21 pub additional_dids: Vec<String>, 22 23 /// Post-specific references (for notification creation) 24 /// Parent post author DID (from reply.parent) 25 pub parent_author_did: Option<String>, 26 27 /// Root post author DID (from reply.root) 28 pub root_author_did: Option<String>, 29 30 /// Quoted post author DID (from embed.record or embed.recordWithMedia) 31 pub quoted_author_did: Option<String>, 32 33 /// Mentioned user DIDs (from facets) 34 pub mentioned_dids: Vec<String>, 35 36 /// Via repost URI (for likes and reposts that came via a repost) 37 /// This is the full AT-URI of the repost, e.g. "at://did:plc:xxx/app.bsky.feed.repost/3abc..." 38 pub via_uri: Option<String>, 39 40 /// Via repost CID (from the StrongRef in the via field) 41 /// This is the actual CID of the repost, needed to create the stub with the correct CID 42 pub via_cid: Option<String>, 43} 44 45impl RecordReferences { 46 /// Create empty references 47 pub fn empty() -> Self { 48 Self { 49 subject_did: None, 50 additional_dids: Vec::new(), 51 parent_author_did: None, 52 root_author_did: None, 53 quoted_author_did: None, 54 mentioned_dids: Vec::new(), 55 via_uri: None, 56 via_cid: None, 57 } 58 } 59 60 /// Create references with just a subject DID 61 pub fn with_subject(subject_did: String) -> Self { 62 Self { 63 subject_did: Some(subject_did), 64 additional_dids: Vec::new(), 65 parent_author_did: None, 66 root_author_did: None, 67 quoted_author_did: None, 68 mentioned_dids: Vec::new(), 69 via_uri: None, 70 via_cid: None, 71 } 72 } 73 74 /// Create references with additional DIDs only 75 pub fn with_additional(dids: Vec<String>) -> Self { 76 Self { 77 subject_did: None, 78 additional_dids: dids, 79 parent_author_did: None, 80 root_author_did: None, 81 quoted_author_did: None, 82 mentioned_dids: Vec::new(), 83 via_uri: None, 84 via_cid: None, 85 } 86 } 87 88 /// Check if there are any references to resolve 89 pub fn has_references(&self) -> bool { 90 self.subject_did.is_some() 91 || !self.additional_dids.is_empty() 92 || self.parent_author_did.is_some() 93 || self.root_author_did.is_some() 94 || self.quoted_author_did.is_some() 95 || !self.mentioned_dids.is_empty() 96 } 97} 98 99/// Extract all actor references from a record 100/// 101/// This identifies which actors need to exist in the database before the record can be processed. 102/// The subject_did is returned for records that have a foreign key relationship (Follow, Block, etc.) 103/// Additional DIDs are returned for records that reference actors without direct FKs. 104pub fn extract_references(record: &RecordTypes) -> RecordReferences { 105 match record { 106 // Follow: subject is the followed actor 107 RecordTypes::AppBskyGraphFollow(rec) => { 108 RecordReferences::with_subject(rec.subject.clone()) 109 } 110 111 // Block: subject is the blocked actor 112 RecordTypes::AppBskyGraphBlock(rec) => { 113 RecordReferences::with_subject(rec.subject.clone()) 114 } 115 116 // ListItem: subject is the actor being added to the list 117 RecordTypes::AppBskyGraphListItem(rec) => { 118 RecordReferences::with_subject(rec.subject.clone()) 119 } 120 121 // Verification: subject is the verified actor 122 RecordTypes::AppBskyGraphVerification(rec) => { 123 RecordReferences::with_subject(rec.subject.to_string()) 124 } 125 126 // Like: extract author DID from liked post URI and via repost URI 127 // NOTE: Likes don't have subject_actor_id FK in DB, but we need it for notifications 128 RecordTypes::AppBskyFeedLike(rec) => { 129 let mut refs = if let Some(subject_did) = parakeet_db::utils::at_uri::extract_did(rec.subject.uri.as_str()) { 130 RecordReferences::with_subject(subject_did.to_string()) 131 } else { 132 RecordReferences::empty() 133 }; 134 135 // Extract via repost URI and CID (full StrongRef) 136 if let Some(via_ref) = &rec.via { 137 refs.via_uri = Some(via_ref.uri.to_string()); 138 refs.via_cid = Some(via_ref.cid.to_string()); 139 } 140 141 refs 142 } 143 144 // Repost: extract author DID from reposted post URI and via repost URI 145 // NOTE: Reposts don't have subject_actor_id FK in DB, but we need it for notifications 146 RecordTypes::AppBskyFeedRepost(rec) => { 147 let mut refs = if let Some(subject_did) = parakeet_db::utils::at_uri::extract_did(rec.subject.uri.as_str()) { 148 RecordReferences::with_subject(subject_did.to_string()) 149 } else { 150 RecordReferences::empty() 151 }; 152 153 // Extract via repost URI and CID (full StrongRef) 154 if let Some(via_ref) = &rec.via { 155 refs.via_uri = Some(via_ref.uri.to_string()); 156 refs.via_cid = Some(via_ref.cid.to_string()); 157 } 158 159 refs 160 } 161 162 // Post: extract DIDs from reply parents, quote embeds, and mentions 163 RecordTypes::AppBskyFeedPost(rec) => { 164 let mut refs = RecordReferences::empty(); 165 166 // Extract reply parent and root authors (for notifications) 167 if let Some(reply) = &rec.reply { 168 refs.parent_author_did = parakeet_db::utils::at_uri::extract_did(&reply.parent.uri) 169 .map(|s| s.to_string()); 170 refs.root_author_did = parakeet_db::utils::at_uri::extract_did(&reply.root.uri) 171 .map(|s| s.to_string()); 172 } 173 174 // Extract quote embed author (for notifications) 175 if let Some(embed) = &rec.embed { 176 if let Some(bsky_embed) = embed.as_bsky() { 177 match bsky_embed { 178 AppBskyEmbed::Record(record_embed) => { 179 refs.quoted_author_did = 180 parakeet_db::utils::at_uri::extract_did(&record_embed.record.uri) 181 .map(|s| s.to_string()); 182 } 183 AppBskyEmbed::RecordWithMedia(rwm) => { 184 refs.quoted_author_did = 185 parakeet_db::utils::at_uri::extract_did(&rwm.record.uri) 186 .map(|s| s.to_string()); 187 } 188 _ => {} 189 } 190 } 191 } 192 193 // Extract mentioned users from facets (for notifications) 194 if let Some(facets) = &rec.facets { 195 refs.mentioned_dids = crate::utils::extract_mentions_and_tags(facets).0; 196 } 197 198 refs 199 } 200 201 // ListBlock: extract list owner DID 202 RecordTypes::AppBskyGraphListBlock(rec) => { 203 let mut dids = Vec::new(); 204 if let Some(list_did) = parakeet_db::utils::at_uri::extract_did(rec.subject.uri.as_str()) { 205 dids.push(list_did.to_string()); 206 } 207 RecordReferences::with_additional(dids) 208 } 209 210 // Threadgate: extract list owner DIDs from list rules 211 RecordTypes::AppBskyFeedThreadgate(rec) => { 212 let mut dids = Vec::new(); 213 if let Some(rules) = &rec.allow { 214 for rule in rules { 215 if let ThreadgateRule::List { list } = rule { 216 if let Some(list_did) = parakeet_db::utils::at_uri::extract_did(list) { 217 dids.push(list_did.to_string()); 218 } 219 } 220 } 221 } 222 RecordReferences::with_additional(dids) 223 } 224 225 // FeedGenerator: extract service DID (the actor hosting the feed generator) 226 RecordTypes::AppBskyFeedGenerator(rec) => { 227 // Only include the service DID if it's non-empty after trimming 228 let trimmed = rec.did.trim(); 229 if trimmed.is_empty() { 230 tracing::warn!( 231 record_did = %rec.did, 232 record_did_len = rec.did.len(), 233 "FeedGenerator has empty service DID - skipping reference extraction" 234 ); 235 RecordReferences::empty() 236 } else { 237 RecordReferences::with_additional(vec![rec.did.clone()]) 238 } 239 } 240 241 // Other record types don't have actor references 242 _ => RecordReferences::empty(), 243 } 244} 245 246#[cfg(test)] 247mod tests { 248 use super::*; 249 use crate::types::records::{ 250 AppBskyActorProfile, AppBskyFeedLike, AppBskyGraphBlock, AppBskyGraphFollow, 251 }; 252 use lexica::StrongRef; 253 254 #[test] 255 fn test_follow_extracts_subject() { 256 let record = RecordTypes::AppBskyGraphFollow(AppBskyGraphFollow { 257 subject: "did:plc:test123".to_string(), 258 created_at: chrono::Utc::now(), 259 }); 260 261 let refs = extract_references(&record); 262 assert_eq!(refs.subject_did, Some("did:plc:test123".to_string())); 263 assert!(refs.additional_dids.is_empty()); 264 assert!(refs.has_references()); 265 } 266 267 #[test] 268 fn test_block_extracts_subject() { 269 let record = RecordTypes::AppBskyGraphBlock(AppBskyGraphBlock { 270 subject: "did:plc:blocked".to_string(), 271 created_at: chrono::Utc::now(), 272 }); 273 274 let refs = extract_references(&record); 275 assert_eq!(refs.subject_did, Some("did:plc:blocked".to_string())); 276 assert!(refs.additional_dids.is_empty()); 277 } 278 279 #[test] 280 fn test_like_extracts_post_author() { 281 use ipld_core::cid::Cid; 282 use std::str::FromStr; 283 284 let record = RecordTypes::AppBskyFeedLike(AppBskyFeedLike { 285 subject: StrongRef { 286 uri: "at://did:plc:author/app.bsky.feed.post/abc123".to_string(), 287 cid: Cid::from_str("bafyreihxj5lhuip5iynyzqj6e4w2dzosfndbcvtqgwdfvpmzw4pj4v76fi") 288 .unwrap(), 289 }, 290 created_at: chrono::Utc::now(), 291 via: None, 292 }); 293 294 let refs = extract_references(&record); 295 // Likes use subject_did for notification creation (even though DB has no FK) 296 assert_eq!(refs.subject_did, Some("did:plc:author".to_string())); 297 assert_eq!(refs.additional_dids, Vec::<String>::new()); 298 } 299 300 #[test] 301 fn test_profile_has_no_references() { 302 let record = RecordTypes::AppBskyActorProfile(AppBskyActorProfile { 303 display_name: Some("Test User".to_string()), 304 description: None, 305 avatar: None, 306 banner: None, 307 labels: None, 308 created_at: None, 309 joined_via_starter_pack: None, 310 pinned_post: None, 311 pronouns: None, 312 website: None, 313 }); 314 315 let refs = extract_references(&record); 316 assert_eq!(refs.subject_did, None); 317 assert!(refs.additional_dids.is_empty()); 318 assert!(!refs.has_references()); 319 } 320}