a tool for shared writing and social publishing
1/**
2 * Utilities for deduplicating records that may exist under both
3 * pub.leaflet.* and site.standard.* namespaces.
4 *
5 * After the migration to site.standard.*, records can exist in both namespaces
6 * with the same DID and rkey. This utility deduplicates them, preferring
7 * site.standard.* records when available.
8 */
9
10import { AtUri } from "@atproto/syntax";
11
12/**
13 * Extracts the identity key (DID + rkey) from an AT URI.
14 * This key uniquely identifies a record across namespaces.
15 *
16 * @example
17 * getRecordIdentityKey("at://did:plc:abc/pub.leaflet.document/3abc")
18 * // Returns: "did:plc:abc/3abc"
19 *
20 * getRecordIdentityKey("at://did:plc:abc/site.standard.document/3abc")
21 * // Returns: "did:plc:abc/3abc" (same key, different namespace)
22 */
23function getRecordIdentityKey(uri: string): string | null {
24 try {
25 const parsed = new AtUri(uri);
26 return `${parsed.host}/${parsed.rkey}`;
27 } catch {
28 return null;
29 }
30}
31
32/**
33 * Checks if a URI is from the site.standard namespace.
34 */
35function isSiteStandardUri(uri: string): boolean {
36 return uri.includes("/site.standard.");
37}
38
39/**
40 * Deduplicates an array of records that have a `uri` property.
41 *
42 * When records exist under both pub.leaflet.* and site.standard.* namespaces
43 * (same DID and rkey), this function keeps only the site.standard version.
44 *
45 * @param records - Array of records with a `uri` property
46 * @returns Deduplicated array, preferring site.standard records
47 *
48 * @example
49 * const docs = [
50 * { uri: "at://did:plc:abc/pub.leaflet.document/3abc", data: {...} },
51 * { uri: "at://did:plc:abc/site.standard.document/3abc", data: {...} },
52 * { uri: "at://did:plc:abc/pub.leaflet.document/3def", data: {...} },
53 * ];
54 * const deduped = deduplicateByUri(docs);
55 * // Returns: [
56 * // { uri: "at://did:plc:abc/site.standard.document/3abc", data: {...} },
57 * // { uri: "at://did:plc:abc/pub.leaflet.document/3def", data: {...} },
58 * // ]
59 */
60export function deduplicateByUri<T extends { uri: string }>(records: T[]): T[] {
61 const recordsByKey = new Map<string, T>();
62
63 for (const record of records) {
64 const key = getRecordIdentityKey(record.uri);
65 if (!key) {
66 // Invalid URI, keep the record as-is
67 continue;
68 }
69
70 const existing = recordsByKey.get(key);
71 if (!existing) {
72 recordsByKey.set(key, record);
73 } else {
74 // Prefer site.standard records over pub.leaflet records
75 if (isSiteStandardUri(record.uri) && !isSiteStandardUri(existing.uri)) {
76 recordsByKey.set(key, record);
77 }
78 // If both are same namespace or existing is already site.standard, keep existing
79 }
80 }
81
82 return Array.from(recordsByKey.values());
83}
84
85/**
86 * Deduplicates records while preserving the original order based on the first
87 * occurrence of each unique record.
88 *
89 * Same deduplication logic as deduplicateByUri, but maintains insertion order.
90 *
91 * @param records - Array of records with a `uri` property
92 * @returns Deduplicated array in original order, preferring site.standard records
93 */
94export function deduplicateByUriOrdered<T extends { uri: string }>(
95 records: T[]
96): T[] {
97 const recordsByKey = new Map<string, { record: T; index: number }>();
98
99 for (let i = 0; i < records.length; i++) {
100 const record = records[i];
101 const key = getRecordIdentityKey(record.uri);
102 if (!key) {
103 continue;
104 }
105
106 const existing = recordsByKey.get(key);
107 if (!existing) {
108 recordsByKey.set(key, { record, index: i });
109 } else {
110 // Prefer site.standard records over pub.leaflet records
111 if (isSiteStandardUri(record.uri) && !isSiteStandardUri(existing.record.uri)) {
112 // Replace with site.standard but keep original position
113 recordsByKey.set(key, { record, index: existing.index });
114 }
115 }
116 }
117
118 // Sort by original index to maintain order
119 return Array.from(recordsByKey.values())
120 .sort((a, b) => a.index - b.index)
121 .map((entry) => entry.record);
122}