Sifa professional network API (Fastify, AT Protocol, Jetstream) sifa.id/
at main 200 lines 5.5 kB view raw
1import { logger } from '../logger.js'; 2 3const FETCH_TIMEOUT = 10000; 4 5export async function discoverFeedUrl(platform: string, url: string): Promise<string | null> { 6 try { 7 if (platform === 'youtube') { 8 return discoverYoutubeFeed(url); 9 } 10 if (platform === 'fediverse') { 11 return discoverFediverseFeed(url); 12 } 13 if (platform === 'rss') { 14 return url; 15 } 16 if (platform === 'website') { 17 return discoverRssFeed(url); 18 } 19 return null; 20 } catch (err) { 21 logger.warn({ err, platform, url }, 'Feed discovery failed'); 22 return null; 23 } 24} 25 26async function discoverYoutubeFeed(url: string): Promise<string | null> { 27 try { 28 const parsed = new URL(url); 29 if ( 30 !parsed.hostname.endsWith('.youtube.com') && 31 parsed.hostname !== 'youtube.com' && 32 !parsed.hostname.endsWith('.youtu.be') && 33 parsed.hostname !== 'youtu.be' 34 ) { 35 return null; 36 } 37 38 // Direct channel ID URL: /channel/UC... 39 const channelMatch = parsed.pathname.match(/\/channel\/(UC[\w-]+)/); 40 if (channelMatch?.[1]) { 41 return `https://www.youtube.com/feeds/videos.xml?channel_id=${channelMatch[1]}`; 42 } 43 44 // For /@handle or /c/name URLs, fetch the page to extract the channel ID 45 const response = await fetch(url, { 46 signal: AbortSignal.timeout(FETCH_TIMEOUT), 47 headers: { 'User-Agent': 'Sifa/1.0 (+https://sifa.id)' }, 48 }); 49 if (!response.ok) return null; 50 51 const html = await response.text(); 52 const idMatch = html.match(/channel_id=([A-Za-z0-9_-]+)/); 53 if (idMatch?.[1]) { 54 return `https://www.youtube.com/feeds/videos.xml?channel_id=${idMatch[1]}`; 55 } 56 57 return null; 58 } catch { 59 return null; 60 } 61} 62 63function discoverFediverseFeed(url: string): string | null { 64 try { 65 const parsed = new URL(url); 66 const pathParts = parsed.pathname.split('/').filter(Boolean); 67 const username = pathParts.find((p) => p.startsWith('@')); 68 if (username) { 69 return `${parsed.origin}/${username}.rss`; 70 } 71 return null; 72 } catch { 73 return null; 74 } 75} 76 77async function discoverRssFeed(url: string): Promise<string | null> { 78 try { 79 const response = await fetch(url, { 80 signal: AbortSignal.timeout(FETCH_TIMEOUT), 81 headers: { 'User-Agent': 'Sifa/1.0 (+https://sifa.id)' }, 82 }); 83 84 if (!response.ok) return null; 85 86 const contentType = response.headers.get('content-type') ?? ''; 87 if ( 88 contentType.includes('xml') || 89 contentType.includes('rss') || 90 contentType.includes('atom') 91 ) { 92 return url; 93 } 94 95 const html = await response.text(); 96 const linkMatch = html.match(/<link[^>]+type=["']application\/(rss|atom)\+xml["'][^>]*>/i); 97 if (!linkMatch) return null; 98 99 const hrefMatch = linkMatch[0].match(/href=["']([^"']+)["']/i); 100 if (!hrefMatch) return null; 101 102 const feedHref = hrefMatch[1]; 103 if (!feedHref) return null; 104 105 try { 106 return new URL(feedHref, url).toString(); 107 } catch { 108 return null; 109 } 110 } catch { 111 return null; 112 } 113} 114 115export interface FeedItem { 116 title: string; 117 excerpt: string; 118 url: string; 119 timestamp: string; 120 source: string; 121} 122 123export async function fetchFeedItems(feedUrl: string, source: string): Promise<FeedItem[]> { 124 try { 125 const response = await fetch(feedUrl, { 126 signal: AbortSignal.timeout(FETCH_TIMEOUT), 127 headers: { 'User-Agent': 'Sifa/1.0 (+https://sifa.id)' }, 128 }); 129 130 if (!response.ok) return []; 131 132 const text = await response.text(); 133 return parseRssFeed(text, source); 134 } catch (err) { 135 logger.warn({ err, feedUrl }, 'Failed to fetch feed'); 136 return []; 137 } 138} 139 140function parseRssFeed(xml: string, source: string): FeedItem[] { 141 const items: FeedItem[] = []; 142 143 const itemMatches = 144 xml.match(/<item[\s>][\s\S]*?<\/item>/gi) ?? xml.match(/<entry[\s>][\s\S]*?<\/entry>/gi) ?? []; 145 146 for (const itemXml of itemMatches.slice(0, 20)) { 147 const title = extractTag(itemXml, 'title') ?? ''; 148 const link = extractLink(itemXml); 149 const description = 150 extractTag(itemXml, 'description') ?? 151 extractTag(itemXml, 'summary') ?? 152 extractTag(itemXml, 'content') ?? 153 ''; 154 const pubDate = 155 extractTag(itemXml, 'pubDate') ?? 156 extractTag(itemXml, 'published') ?? 157 extractTag(itemXml, 'updated') ?? 158 ''; 159 160 let plainDesc = description; 161 let prev = ''; 162 while (prev !== plainDesc) { 163 prev = plainDesc; 164 plainDesc = plainDesc.replace(/<[^>]+>/g, ''); 165 } 166 plainDesc = plainDesc.trim(); 167 const excerpt = plainDesc.length > 200 ? plainDesc.slice(0, 200) + '...' : plainDesc; 168 169 if (title || link) { 170 items.push({ 171 title: title.replace(/<!\[CDATA\[(.*?)\]\]>/g, '$1').trim(), 172 excerpt, 173 url: link ?? '', 174 timestamp: pubDate ? new Date(pubDate).toISOString() : '', 175 source, 176 }); 177 } 178 } 179 180 return items; 181} 182 183function extractTag(xml: string, tag: string): string | null { 184 const match = xml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'i')); 185 if (!match) return null; 186 const content = match[1]; 187 if (!content) return null; 188 return content.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1').trim(); 189} 190 191function extractLink(xml: string): string | null { 192 const linkTag = xml.match(/<link[^>]+href=["']([^"']+)["'][^>]*\/?>/i); 193 if (linkTag) { 194 const href = linkTag[1]; 195 return href ?? null; 196 } 197 198 const linkContent = extractTag(xml, 'link'); 199 return linkContent; 200}