1//! Reference extraction from AT Protocol records
2//!
3//! This module extracts actor DIDs and other references from records that need
4//! to be resolved before the record can be processed. This includes:
5//! - Subject actors for Follow, Block, ListItem, Verification records
6//! - Referenced actors in posts (reply parents, quote embeds, mentions)
7//! - Referenced actors in likes and reposts
8
9use crate::relay::types::RecordTypes;
10use crate::types::records::{AppBskyEmbed, ThreadgateRule};
11
12/// Information about references that need to be resolved from a record
13#[derive(Debug, Clone)]
14pub struct RecordReferences {
15 /// The subject actor DID (for records that have a direct subject_actor_id FK)
16 /// Returns Some(did) for: Follow, Block, ListItem, Verification
17 pub subject_did: Option<String>,
18
19 /// Additional actor DIDs that need to exist (but don't have FK relationships in the record table)
20 /// Includes: like/repost targets, threadgate list owners
21 pub additional_dids: Vec<String>,
22
23 /// Post-specific references (for notification creation)
24 /// Parent post author DID (from reply.parent)
25 pub parent_author_did: Option<String>,
26
27 /// Root post author DID (from reply.root)
28 pub root_author_did: Option<String>,
29
30 /// Quoted post author DID (from embed.record or embed.recordWithMedia)
31 pub quoted_author_did: Option<String>,
32
33 /// Mentioned user DIDs (from facets)
34 pub mentioned_dids: Vec<String>,
35
36 /// Via repost URI (for likes and reposts that came via a repost)
37 /// This is the full AT-URI of the repost, e.g. "at://did:plc:xxx/app.bsky.feed.repost/3abc..."
38 pub via_uri: Option<String>,
39
40 /// Via repost CID (from the StrongRef in the via field)
41 /// This is the actual CID of the repost, needed to create the stub with the correct CID
42 pub via_cid: Option<String>,
43}
44
45impl RecordReferences {
46 /// Create empty references
47 pub fn empty() -> Self {
48 Self {
49 subject_did: None,
50 additional_dids: Vec::new(),
51 parent_author_did: None,
52 root_author_did: None,
53 quoted_author_did: None,
54 mentioned_dids: Vec::new(),
55 via_uri: None,
56 via_cid: None,
57 }
58 }
59
60 /// Create references with just a subject DID
61 pub fn with_subject(subject_did: String) -> Self {
62 Self {
63 subject_did: Some(subject_did),
64 additional_dids: Vec::new(),
65 parent_author_did: None,
66 root_author_did: None,
67 quoted_author_did: None,
68 mentioned_dids: Vec::new(),
69 via_uri: None,
70 via_cid: None,
71 }
72 }
73
74 /// Create references with additional DIDs only
75 pub fn with_additional(dids: Vec<String>) -> Self {
76 Self {
77 subject_did: None,
78 additional_dids: dids,
79 parent_author_did: None,
80 root_author_did: None,
81 quoted_author_did: None,
82 mentioned_dids: Vec::new(),
83 via_uri: None,
84 via_cid: None,
85 }
86 }
87
88 /// Check if there are any references to resolve
89 pub fn has_references(&self) -> bool {
90 self.subject_did.is_some()
91 || !self.additional_dids.is_empty()
92 || self.parent_author_did.is_some()
93 || self.root_author_did.is_some()
94 || self.quoted_author_did.is_some()
95 || !self.mentioned_dids.is_empty()
96 }
97}
98
99/// Extract all actor references from a record
100///
101/// This identifies which actors need to exist in the database before the record can be processed.
102/// The subject_did is returned for records that have a foreign key relationship (Follow, Block, etc.)
103/// Additional DIDs are returned for records that reference actors without direct FKs.
104pub fn extract_references(record: &RecordTypes) -> RecordReferences {
105 match record {
106 // Follow: subject is the followed actor
107 RecordTypes::AppBskyGraphFollow(rec) => {
108 RecordReferences::with_subject(rec.subject.clone())
109 }
110
111 // Block: subject is the blocked actor
112 RecordTypes::AppBskyGraphBlock(rec) => {
113 RecordReferences::with_subject(rec.subject.clone())
114 }
115
116 // ListItem: subject is the actor being added to the list
117 RecordTypes::AppBskyGraphListItem(rec) => {
118 RecordReferences::with_subject(rec.subject.clone())
119 }
120
121 // Verification: subject is the verified actor
122 RecordTypes::AppBskyGraphVerification(rec) => {
123 RecordReferences::with_subject(rec.subject.to_string())
124 }
125
126 // Like: extract author DID from liked post URI and via repost URI
127 // NOTE: Likes don't have subject_actor_id FK in DB, but we need it for notifications
128 RecordTypes::AppBskyFeedLike(rec) => {
129 let mut refs = if let Some(subject_did) = parakeet_db::utils::at_uri::extract_did(rec.subject.uri.as_str()) {
130 RecordReferences::with_subject(subject_did.to_string())
131 } else {
132 RecordReferences::empty()
133 };
134
135 // Extract via repost URI and CID (full StrongRef)
136 if let Some(via_ref) = &rec.via {
137 refs.via_uri = Some(via_ref.uri.to_string());
138 refs.via_cid = Some(via_ref.cid.to_string());
139 }
140
141 refs
142 }
143
144 // Repost: extract author DID from reposted post URI and via repost URI
145 // NOTE: Reposts don't have subject_actor_id FK in DB, but we need it for notifications
146 RecordTypes::AppBskyFeedRepost(rec) => {
147 let mut refs = if let Some(subject_did) = parakeet_db::utils::at_uri::extract_did(rec.subject.uri.as_str()) {
148 RecordReferences::with_subject(subject_did.to_string())
149 } else {
150 RecordReferences::empty()
151 };
152
153 // Extract via repost URI and CID (full StrongRef)
154 if let Some(via_ref) = &rec.via {
155 refs.via_uri = Some(via_ref.uri.to_string());
156 refs.via_cid = Some(via_ref.cid.to_string());
157 }
158
159 refs
160 }
161
162 // Post: extract DIDs from reply parents, quote embeds, and mentions
163 RecordTypes::AppBskyFeedPost(rec) => {
164 let mut refs = RecordReferences::empty();
165
166 // Extract reply parent and root authors (for notifications)
167 if let Some(reply) = &rec.reply {
168 refs.parent_author_did = parakeet_db::utils::at_uri::extract_did(&reply.parent.uri)
169 .map(|s| s.to_string());
170 refs.root_author_did = parakeet_db::utils::at_uri::extract_did(&reply.root.uri)
171 .map(|s| s.to_string());
172 }
173
174 // Extract quote embed author (for notifications)
175 if let Some(embed) = &rec.embed {
176 if let Some(bsky_embed) = embed.as_bsky() {
177 match bsky_embed {
178 AppBskyEmbed::Record(record_embed) => {
179 refs.quoted_author_did =
180 parakeet_db::utils::at_uri::extract_did(&record_embed.record.uri)
181 .map(|s| s.to_string());
182 }
183 AppBskyEmbed::RecordWithMedia(rwm) => {
184 refs.quoted_author_did =
185 parakeet_db::utils::at_uri::extract_did(&rwm.record.uri)
186 .map(|s| s.to_string());
187 }
188 _ => {}
189 }
190 }
191 }
192
193 // Extract mentioned users from facets (for notifications)
194 if let Some(facets) = &rec.facets {
195 refs.mentioned_dids = crate::utils::extract_mentions_and_tags(facets).0;
196 }
197
198 refs
199 }
200
201 // ListBlock: extract list owner DID
202 RecordTypes::AppBskyGraphListBlock(rec) => {
203 let mut dids = Vec::new();
204 if let Some(list_did) = parakeet_db::utils::at_uri::extract_did(rec.subject.uri.as_str()) {
205 dids.push(list_did.to_string());
206 }
207 RecordReferences::with_additional(dids)
208 }
209
210 // Threadgate: extract list owner DIDs from list rules
211 RecordTypes::AppBskyFeedThreadgate(rec) => {
212 let mut dids = Vec::new();
213 if let Some(rules) = &rec.allow {
214 for rule in rules {
215 if let ThreadgateRule::List { list } = rule {
216 if let Some(list_did) = parakeet_db::utils::at_uri::extract_did(list) {
217 dids.push(list_did.to_string());
218 }
219 }
220 }
221 }
222 RecordReferences::with_additional(dids)
223 }
224
225 // FeedGenerator: extract service DID (the actor hosting the feed generator)
226 RecordTypes::AppBskyFeedGenerator(rec) => {
227 // Only include the service DID if it's non-empty after trimming
228 let trimmed = rec.did.trim();
229 if trimmed.is_empty() {
230 tracing::warn!(
231 record_did = %rec.did,
232 record_did_len = rec.did.len(),
233 "FeedGenerator has empty service DID - skipping reference extraction"
234 );
235 RecordReferences::empty()
236 } else {
237 RecordReferences::with_additional(vec![rec.did.clone()])
238 }
239 }
240
241 // Other record types don't have actor references
242 _ => RecordReferences::empty(),
243 }
244}
245
246#[cfg(test)]
247mod tests {
248 use super::*;
249 use crate::types::records::{
250 AppBskyActorProfile, AppBskyFeedLike, AppBskyGraphBlock, AppBskyGraphFollow,
251 };
252 use lexica::StrongRef;
253
254 #[test]
255 fn test_follow_extracts_subject() {
256 let record = RecordTypes::AppBskyGraphFollow(AppBskyGraphFollow {
257 subject: "did:plc:test123".to_string(),
258 created_at: chrono::Utc::now(),
259 });
260
261 let refs = extract_references(&record);
262 assert_eq!(refs.subject_did, Some("did:plc:test123".to_string()));
263 assert!(refs.additional_dids.is_empty());
264 assert!(refs.has_references());
265 }
266
267 #[test]
268 fn test_block_extracts_subject() {
269 let record = RecordTypes::AppBskyGraphBlock(AppBskyGraphBlock {
270 subject: "did:plc:blocked".to_string(),
271 created_at: chrono::Utc::now(),
272 });
273
274 let refs = extract_references(&record);
275 assert_eq!(refs.subject_did, Some("did:plc:blocked".to_string()));
276 assert!(refs.additional_dids.is_empty());
277 }
278
279 #[test]
280 fn test_like_extracts_post_author() {
281 use ipld_core::cid::Cid;
282 use std::str::FromStr;
283
284 let record = RecordTypes::AppBskyFeedLike(AppBskyFeedLike {
285 subject: StrongRef {
286 uri: "at://did:plc:author/app.bsky.feed.post/abc123".to_string(),
287 cid: Cid::from_str("bafyreihxj5lhuip5iynyzqj6e4w2dzosfndbcvtqgwdfvpmzw4pj4v76fi")
288 .unwrap(),
289 },
290 created_at: chrono::Utc::now(),
291 via: None,
292 });
293
294 let refs = extract_references(&record);
295 // Likes use subject_did for notification creation (even though DB has no FK)
296 assert_eq!(refs.subject_did, Some("did:plc:author".to_string()));
297 assert_eq!(refs.additional_dids, Vec::<String>::new());
298 }
299
300 #[test]
301 fn test_profile_has_no_references() {
302 let record = RecordTypes::AppBskyActorProfile(AppBskyActorProfile {
303 display_name: Some("Test User".to_string()),
304 description: None,
305 avatar: None,
306 banner: None,
307 labels: None,
308 created_at: None,
309 joined_via_starter_pack: None,
310 pinned_post: None,
311 pronouns: None,
312 website: None,
313 });
314
315 let refs = extract_references(&record);
316 assert_eq!(refs.subject_did, None);
317 assert!(refs.additional_dids.is_empty());
318 assert!(!refs.has_references());
319 }
320}