//! Reference extraction from AT Protocol records //! //! This module extracts actor DIDs and other references from records that need //! to be resolved before the record can be processed. This includes: //! - Subject actors for Follow, Block, ListItem, Verification records //! - Referenced actors in posts (reply parents, quote embeds, mentions) //! - Referenced actors in likes and reposts use crate::relay::types::RecordTypes; use crate::types::records::{AppBskyEmbed, ThreadgateRule}; /// Information about references that need to be resolved from a record #[derive(Debug, Clone)] pub struct RecordReferences { /// The subject actor DID (for records that have a direct subject_actor_id FK) /// Returns Some(did) for: Follow, Block, ListItem, Verification pub subject_did: Option, /// Additional actor DIDs that need to exist (but don't have FK relationships in the record table) /// Includes: like/repost targets, threadgate list owners pub additional_dids: Vec, /// Post-specific references (for notification creation) /// Parent post author DID (from reply.parent) pub parent_author_did: Option, /// Root post author DID (from reply.root) pub root_author_did: Option, /// Quoted post author DID (from embed.record or embed.recordWithMedia) pub quoted_author_did: Option, /// Mentioned user DIDs (from facets) pub mentioned_dids: Vec, /// Via repost URI (for likes and reposts that came via a repost) /// This is the full AT-URI of the repost, e.g. "at://did:plc:xxx/app.bsky.feed.repost/3abc..." pub via_uri: Option, /// Via repost CID (from the StrongRef in the via field) /// This is the actual CID of the repost, needed to create the stub with the correct CID pub via_cid: Option, } impl RecordReferences { /// Create empty references pub fn empty() -> Self { Self { subject_did: None, additional_dids: Vec::new(), parent_author_did: None, root_author_did: None, quoted_author_did: None, mentioned_dids: Vec::new(), via_uri: None, via_cid: None, } } /// Create references with just a subject DID pub fn with_subject(subject_did: String) -> Self { Self { subject_did: Some(subject_did), additional_dids: Vec::new(), parent_author_did: None, root_author_did: None, quoted_author_did: None, mentioned_dids: Vec::new(), via_uri: None, via_cid: None, } } /// Create references with additional DIDs only pub fn with_additional(dids: Vec) -> Self { Self { subject_did: None, additional_dids: dids, parent_author_did: None, root_author_did: None, quoted_author_did: None, mentioned_dids: Vec::new(), via_uri: None, via_cid: None, } } /// Check if there are any references to resolve pub fn has_references(&self) -> bool { self.subject_did.is_some() || !self.additional_dids.is_empty() || self.parent_author_did.is_some() || self.root_author_did.is_some() || self.quoted_author_did.is_some() || !self.mentioned_dids.is_empty() } } /// Extract all actor references from a record /// /// This identifies which actors need to exist in the database before the record can be processed. /// The subject_did is returned for records that have a foreign key relationship (Follow, Block, etc.) /// Additional DIDs are returned for records that reference actors without direct FKs. pub fn extract_references(record: &RecordTypes) -> RecordReferences { match record { // Follow: subject is the followed actor RecordTypes::AppBskyGraphFollow(rec) => { RecordReferences::with_subject(rec.subject.clone()) } // Block: subject is the blocked actor RecordTypes::AppBskyGraphBlock(rec) => { RecordReferences::with_subject(rec.subject.clone()) } // ListItem: subject is the actor being added to the list RecordTypes::AppBskyGraphListItem(rec) => { RecordReferences::with_subject(rec.subject.clone()) } // Verification: subject is the verified actor RecordTypes::AppBskyGraphVerification(rec) => { RecordReferences::with_subject(rec.subject.to_string()) } // Like: extract author DID from liked post URI and via repost URI // NOTE: Likes don't have subject_actor_id FK in DB, but we need it for notifications RecordTypes::AppBskyFeedLike(rec) => { let mut refs = if let Some(subject_did) = parakeet_db::utils::at_uri::extract_did(rec.subject.uri.as_str()) { RecordReferences::with_subject(subject_did.to_string()) } else { RecordReferences::empty() }; // Extract via repost URI and CID (full StrongRef) if let Some(via_ref) = &rec.via { refs.via_uri = Some(via_ref.uri.to_string()); refs.via_cid = Some(via_ref.cid.to_string()); } refs } // Repost: extract author DID from reposted post URI and via repost URI // NOTE: Reposts don't have subject_actor_id FK in DB, but we need it for notifications RecordTypes::AppBskyFeedRepost(rec) => { let mut refs = if let Some(subject_did) = parakeet_db::utils::at_uri::extract_did(rec.subject.uri.as_str()) { RecordReferences::with_subject(subject_did.to_string()) } else { RecordReferences::empty() }; // Extract via repost URI and CID (full StrongRef) if let Some(via_ref) = &rec.via { refs.via_uri = Some(via_ref.uri.to_string()); refs.via_cid = Some(via_ref.cid.to_string()); } refs } // Post: extract DIDs from reply parents, quote embeds, and mentions RecordTypes::AppBskyFeedPost(rec) => { let mut refs = RecordReferences::empty(); // Extract reply parent and root authors (for notifications) if let Some(reply) = &rec.reply { refs.parent_author_did = parakeet_db::utils::at_uri::extract_did(&reply.parent.uri) .map(|s| s.to_string()); refs.root_author_did = parakeet_db::utils::at_uri::extract_did(&reply.root.uri) .map(|s| s.to_string()); } // Extract quote embed author (for notifications) if let Some(embed) = &rec.embed { if let Some(bsky_embed) = embed.as_bsky() { match bsky_embed { AppBskyEmbed::Record(record_embed) => { refs.quoted_author_did = parakeet_db::utils::at_uri::extract_did(&record_embed.record.uri) .map(|s| s.to_string()); } AppBskyEmbed::RecordWithMedia(rwm) => { refs.quoted_author_did = parakeet_db::utils::at_uri::extract_did(&rwm.record.uri) .map(|s| s.to_string()); } _ => {} } } } // Extract mentioned users from facets (for notifications) if let Some(facets) = &rec.facets { refs.mentioned_dids = crate::utils::extract_mentions_and_tags(facets).0; } refs } // ListBlock: extract list owner DID RecordTypes::AppBskyGraphListBlock(rec) => { let mut dids = Vec::new(); if let Some(list_did) = parakeet_db::utils::at_uri::extract_did(rec.subject.uri.as_str()) { dids.push(list_did.to_string()); } RecordReferences::with_additional(dids) } // Threadgate: extract list owner DIDs from list rules RecordTypes::AppBskyFeedThreadgate(rec) => { let mut dids = Vec::new(); if let Some(rules) = &rec.allow { for rule in rules { if let ThreadgateRule::List { list } = rule { if let Some(list_did) = parakeet_db::utils::at_uri::extract_did(list) { dids.push(list_did.to_string()); } } } } RecordReferences::with_additional(dids) } // FeedGenerator: extract service DID (the actor hosting the feed generator) RecordTypes::AppBskyFeedGenerator(rec) => { // Only include the service DID if it's non-empty after trimming let trimmed = rec.did.trim(); if trimmed.is_empty() { tracing::warn!( record_did = %rec.did, record_did_len = rec.did.len(), "FeedGenerator has empty service DID - skipping reference extraction" ); RecordReferences::empty() } else { RecordReferences::with_additional(vec![rec.did.clone()]) } } // Other record types don't have actor references _ => RecordReferences::empty(), } } #[cfg(test)] mod tests { use super::*; use crate::types::records::{ AppBskyActorProfile, AppBskyFeedLike, AppBskyGraphBlock, AppBskyGraphFollow, }; use lexica::StrongRef; #[test] fn test_follow_extracts_subject() { let record = RecordTypes::AppBskyGraphFollow(AppBskyGraphFollow { subject: "did:plc:test123".to_string(), created_at: chrono::Utc::now(), }); let refs = extract_references(&record); assert_eq!(refs.subject_did, Some("did:plc:test123".to_string())); assert!(refs.additional_dids.is_empty()); assert!(refs.has_references()); } #[test] fn test_block_extracts_subject() { let record = RecordTypes::AppBskyGraphBlock(AppBskyGraphBlock { subject: "did:plc:blocked".to_string(), created_at: chrono::Utc::now(), }); let refs = extract_references(&record); assert_eq!(refs.subject_did, Some("did:plc:blocked".to_string())); assert!(refs.additional_dids.is_empty()); } #[test] fn test_like_extracts_post_author() { use ipld_core::cid::Cid; use std::str::FromStr; let record = RecordTypes::AppBskyFeedLike(AppBskyFeedLike { subject: StrongRef { uri: "at://did:plc:author/app.bsky.feed.post/abc123".to_string(), cid: Cid::from_str("bafyreihxj5lhuip5iynyzqj6e4w2dzosfndbcvtqgwdfvpmzw4pj4v76fi") .unwrap(), }, created_at: chrono::Utc::now(), via: None, }); let refs = extract_references(&record); // Likes use subject_did for notification creation (even though DB has no FK) assert_eq!(refs.subject_did, Some("did:plc:author".to_string())); assert_eq!(refs.additional_dids, Vec::::new()); } #[test] fn test_profile_has_no_references() { let record = RecordTypes::AppBskyActorProfile(AppBskyActorProfile { display_name: Some("Test User".to_string()), description: None, avatar: None, banner: None, labels: None, created_at: None, joined_via_starter_pack: None, pinned_post: None, pronouns: None, website: None, }); let refs = extract_references(&record); assert_eq!(refs.subject_did, None); assert!(refs.additional_dids.is_empty()); assert!(!refs.has_references()); } }