a tool for shared writing and social publishing
1/** 2 * Utilities for deduplicating records that may exist under both 3 * pub.leaflet.* and site.standard.* namespaces. 4 * 5 * After the migration to site.standard.*, records can exist in both namespaces 6 * with the same DID and rkey. This utility deduplicates them, preferring 7 * site.standard.* records when available. 8 */ 9 10import { AtUri } from "@atproto/syntax"; 11 12/** 13 * Extracts the identity key (DID + rkey) from an AT URI. 14 * This key uniquely identifies a record across namespaces. 15 * 16 * @example 17 * getRecordIdentityKey("at://did:plc:abc/pub.leaflet.document/3abc") 18 * // Returns: "did:plc:abc/3abc" 19 * 20 * getRecordIdentityKey("at://did:plc:abc/site.standard.document/3abc") 21 * // Returns: "did:plc:abc/3abc" (same key, different namespace) 22 */ 23function getRecordIdentityKey(uri: string): string | null { 24 try { 25 const parsed = new AtUri(uri); 26 return `${parsed.host}/${parsed.rkey}`; 27 } catch { 28 return null; 29 } 30} 31 32/** 33 * Checks if a URI is from the site.standard namespace. 34 */ 35function isSiteStandardUri(uri: string): boolean { 36 return uri.includes("/site.standard."); 37} 38 39/** 40 * Deduplicates an array of records that have a `uri` property. 41 * 42 * When records exist under both pub.leaflet.* and site.standard.* namespaces 43 * (same DID and rkey), this function keeps only the site.standard version. 44 * 45 * @param records - Array of records with a `uri` property 46 * @returns Deduplicated array, preferring site.standard records 47 * 48 * @example 49 * const docs = [ 50 * { uri: "at://did:plc:abc/pub.leaflet.document/3abc", data: {...} }, 51 * { uri: "at://did:plc:abc/site.standard.document/3abc", data: {...} }, 52 * { uri: "at://did:plc:abc/pub.leaflet.document/3def", data: {...} }, 53 * ]; 54 * const deduped = deduplicateByUri(docs); 55 * // Returns: [ 56 * // { uri: "at://did:plc:abc/site.standard.document/3abc", data: {...} }, 57 * // { uri: "at://did:plc:abc/pub.leaflet.document/3def", data: {...} }, 58 * // ] 59 */ 60export function deduplicateByUri<T extends { uri: string }>(records: T[]): T[] { 61 const recordsByKey = new Map<string, T>(); 62 63 for (const record of records) { 64 const key = getRecordIdentityKey(record.uri); 65 if (!key) { 66 // Invalid URI, keep the record as-is 67 continue; 68 } 69 70 const existing = recordsByKey.get(key); 71 if (!existing) { 72 recordsByKey.set(key, record); 73 } else { 74 // Prefer site.standard records over pub.leaflet records 75 if (isSiteStandardUri(record.uri) && !isSiteStandardUri(existing.uri)) { 76 recordsByKey.set(key, record); 77 } 78 // If both are same namespace or existing is already site.standard, keep existing 79 } 80 } 81 82 return Array.from(recordsByKey.values()); 83} 84 85/** 86 * Deduplicates records while preserving the original order based on the first 87 * occurrence of each unique record. 88 * 89 * Same deduplication logic as deduplicateByUri, but maintains insertion order. 90 * 91 * @param records - Array of records with a `uri` property 92 * @returns Deduplicated array in original order, preferring site.standard records 93 */ 94export function deduplicateByUriOrdered<T extends { uri: string }>( 95 records: T[] 96): T[] { 97 const recordsByKey = new Map<string, { record: T; index: number }>(); 98 99 for (let i = 0; i < records.length; i++) { 100 const record = records[i]; 101 const key = getRecordIdentityKey(record.uri); 102 if (!key) { 103 continue; 104 } 105 106 const existing = recordsByKey.get(key); 107 if (!existing) { 108 recordsByKey.set(key, { record, index: i }); 109 } else { 110 // Prefer site.standard records over pub.leaflet records 111 if (isSiteStandardUri(record.uri) && !isSiteStandardUri(existing.record.uri)) { 112 // Replace with site.standard but keep original position 113 recordsByKey.set(key, { record, index: existing.index }); 114 } 115 } 116 } 117 118 // Sort by original index to maintain order 119 return Array.from(recordsByKey.values()) 120 .sort((a, b) => a.index - b.index) 121 .map((entry) => entry.record); 122}