Sifa professional network API (Fastify, AT Protocol, Jetstream)
sifa.id/
1import { logger } from '../logger.js';
2
3const FETCH_TIMEOUT = 10000;
4
5export async function discoverFeedUrl(platform: string, url: string): Promise<string | null> {
6 try {
7 if (platform === 'youtube') {
8 return discoverYoutubeFeed(url);
9 }
10 if (platform === 'fediverse') {
11 return discoverFediverseFeed(url);
12 }
13 if (platform === 'rss') {
14 return url;
15 }
16 if (platform === 'website') {
17 return discoverRssFeed(url);
18 }
19 return null;
20 } catch (err) {
21 logger.warn({ err, platform, url }, 'Feed discovery failed');
22 return null;
23 }
24}
25
26async function discoverYoutubeFeed(url: string): Promise<string | null> {
27 try {
28 const parsed = new URL(url);
29 if (
30 !parsed.hostname.endsWith('.youtube.com') &&
31 parsed.hostname !== 'youtube.com' &&
32 !parsed.hostname.endsWith('.youtu.be') &&
33 parsed.hostname !== 'youtu.be'
34 ) {
35 return null;
36 }
37
38 // Direct channel ID URL: /channel/UC...
39 const channelMatch = parsed.pathname.match(/\/channel\/(UC[\w-]+)/);
40 if (channelMatch?.[1]) {
41 return `https://www.youtube.com/feeds/videos.xml?channel_id=${channelMatch[1]}`;
42 }
43
44 // For /@handle or /c/name URLs, fetch the page to extract the channel ID
45 const response = await fetch(url, {
46 signal: AbortSignal.timeout(FETCH_TIMEOUT),
47 headers: { 'User-Agent': 'Sifa/1.0 (+https://sifa.id)' },
48 });
49 if (!response.ok) return null;
50
51 const html = await response.text();
52 const idMatch = html.match(/channel_id=([A-Za-z0-9_-]+)/);
53 if (idMatch?.[1]) {
54 return `https://www.youtube.com/feeds/videos.xml?channel_id=${idMatch[1]}`;
55 }
56
57 return null;
58 } catch {
59 return null;
60 }
61}
62
63function discoverFediverseFeed(url: string): string | null {
64 try {
65 const parsed = new URL(url);
66 const pathParts = parsed.pathname.split('/').filter(Boolean);
67 const username = pathParts.find((p) => p.startsWith('@'));
68 if (username) {
69 return `${parsed.origin}/${username}.rss`;
70 }
71 return null;
72 } catch {
73 return null;
74 }
75}
76
77async function discoverRssFeed(url: string): Promise<string | null> {
78 try {
79 const response = await fetch(url, {
80 signal: AbortSignal.timeout(FETCH_TIMEOUT),
81 headers: { 'User-Agent': 'Sifa/1.0 (+https://sifa.id)' },
82 });
83
84 if (!response.ok) return null;
85
86 const contentType = response.headers.get('content-type') ?? '';
87 if (
88 contentType.includes('xml') ||
89 contentType.includes('rss') ||
90 contentType.includes('atom')
91 ) {
92 return url;
93 }
94
95 const html = await response.text();
96 const linkMatch = html.match(/<link[^>]+type=["']application\/(rss|atom)\+xml["'][^>]*>/i);
97 if (!linkMatch) return null;
98
99 const hrefMatch = linkMatch[0].match(/href=["']([^"']+)["']/i);
100 if (!hrefMatch) return null;
101
102 const feedHref = hrefMatch[1];
103 if (!feedHref) return null;
104
105 try {
106 return new URL(feedHref, url).toString();
107 } catch {
108 return null;
109 }
110 } catch {
111 return null;
112 }
113}
114
115export interface FeedItem {
116 title: string;
117 excerpt: string;
118 url: string;
119 timestamp: string;
120 source: string;
121}
122
123export async function fetchFeedItems(feedUrl: string, source: string): Promise<FeedItem[]> {
124 try {
125 const response = await fetch(feedUrl, {
126 signal: AbortSignal.timeout(FETCH_TIMEOUT),
127 headers: { 'User-Agent': 'Sifa/1.0 (+https://sifa.id)' },
128 });
129
130 if (!response.ok) return [];
131
132 const text = await response.text();
133 return parseRssFeed(text, source);
134 } catch (err) {
135 logger.warn({ err, feedUrl }, 'Failed to fetch feed');
136 return [];
137 }
138}
139
140function parseRssFeed(xml: string, source: string): FeedItem[] {
141 const items: FeedItem[] = [];
142
143 const itemMatches =
144 xml.match(/<item[\s>][\s\S]*?<\/item>/gi) ?? xml.match(/<entry[\s>][\s\S]*?<\/entry>/gi) ?? [];
145
146 for (const itemXml of itemMatches.slice(0, 20)) {
147 const title = extractTag(itemXml, 'title') ?? '';
148 const link = extractLink(itemXml);
149 const description =
150 extractTag(itemXml, 'description') ??
151 extractTag(itemXml, 'summary') ??
152 extractTag(itemXml, 'content') ??
153 '';
154 const pubDate =
155 extractTag(itemXml, 'pubDate') ??
156 extractTag(itemXml, 'published') ??
157 extractTag(itemXml, 'updated') ??
158 '';
159
160 let plainDesc = description;
161 let prev = '';
162 while (prev !== plainDesc) {
163 prev = plainDesc;
164 plainDesc = plainDesc.replace(/<[^>]+>/g, '');
165 }
166 plainDesc = plainDesc.trim();
167 const excerpt = plainDesc.length > 200 ? plainDesc.slice(0, 200) + '...' : plainDesc;
168
169 if (title || link) {
170 items.push({
171 title: title.replace(/<!\[CDATA\[(.*?)\]\]>/g, '$1').trim(),
172 excerpt,
173 url: link ?? '',
174 timestamp: pubDate ? new Date(pubDate).toISOString() : '',
175 source,
176 });
177 }
178 }
179
180 return items;
181}
182
183function extractTag(xml: string, tag: string): string | null {
184 const match = xml.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'i'));
185 if (!match) return null;
186 const content = match[1];
187 if (!content) return null;
188 return content.replace(/<!\[CDATA\[([\s\S]*?)\]\]>/g, '$1').trim();
189}
190
191function extractLink(xml: string): string | null {
192 const linkTag = xml.match(/<link[^>]+href=["']([^"']+)["'][^>]*\/?>/i);
193 if (linkTag) {
194 const href = linkTag[1];
195 return href ?? null;
196 }
197
198 const linkContent = extractTag(xml, 'link');
199 return linkContent;
200}