Aethel Bot OSS repository!
aethel.xyz
bot
fun
ai
discord
discord-bot
aethel
1import ogs from 'open-graph-scraper';
2import logger from './logger';
3
4export interface OpenGraphData {
5 title?: string;
6 description?: string;
7 image?: string;
8 url?: string;
9 siteName?: string;
10 type?: string;
11}
12
13const URL_REGEX =
14 /https?:\/\/[^\s<>]+|(?:www\.)?[a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]*\.(?:com|org|net|edu|gov|mil|int|xyz|io|co|me|ly|app|dev|tech|info|biz|name|tv|cc|uk|de|fr|jp|cn|au|us|ca|nl|be|it|es|ru|in|br|mx|ch|se|no|dk|fi|pl|cz|hu|ro|bg|hr|sk|si|ee|lv|lt|gr|pt|ie|at|lu)\b(?:\/[^\s<>]*)?/g;
15const CACHE_TTL = 24 * 60 * 60 * 1000;
16const cache = new Map<string, { data: OpenGraphData | null; timestamp: number }>();
17
18export function extractUrls(text: string): string[] {
19 const matches = text.match(URL_REGEX);
20 if (!matches) return [];
21
22 return matches
23 .map((url) => {
24 let cleanUrl = url.replace(/[.,;:!?)\]}>'"]*$/, '');
25
26 if (!cleanUrl.startsWith('http://') && !cleanUrl.startsWith('https://')) {
27 cleanUrl = 'https://' + cleanUrl;
28 }
29
30 return cleanUrl;
31 })
32 .filter((url) => {
33 try {
34 const parsed = new URL(url);
35 const hostname = parsed.hostname.toLowerCase();
36 const skipDomains = ['t.co', 'bit.ly', 'tinyurl.com', 'is.gd', 'localhost'];
37
38 if (!hostname.includes('.') || hostname.endsWith('.') || hostname.startsWith('.')) {
39 return false;
40 }
41
42 return !skipDomains.includes(hostname) && ['http:', 'https:'].includes(parsed.protocol);
43 } catch {
44 return false;
45 }
46 })
47 .slice(0, 3);
48}
49
50export async function fetchOpenGraphData(url: string): Promise<OpenGraphData | null> {
51 try {
52 const cached = cache.get(url);
53 if (cached && Date.now() - cached.timestamp < CACHE_TTL) {
54 return cached.data;
55 }
56
57 const options = {
58 url: url,
59 timeout: 5000,
60 fetchOptions: {
61 headers: {
62 'User-Agent': 'Aethel/2.0 (+https://aethel.xyz)',
63 },
64 },
65 };
66
67 const { error, result } = await ogs(options);
68
69 if (error || !result) {
70 cache.set(url, { data: null, timestamp: Date.now() });
71 return null;
72 }
73
74 let imageUrl = result.ogImage?.[0]?.url || result.twitterImage?.[0]?.url || result.favicon;
75
76 if (imageUrl && !isValidImageUrl(imageUrl)) {
77 imageUrl = undefined;
78 }
79
80 const ogData: OpenGraphData = {
81 title: result.ogTitle || result.twitterTitle || result.dcTitle,
82 description: result.ogDescription || result.twitterDescription || result.dcDescription,
83 image: imageUrl,
84 url: result.ogUrl || result.twitterUrl || url,
85 siteName: result.ogSiteName || result.twitterSite,
86 type: result.ogType || 'website',
87 };
88
89 if (!ogData.title && !ogData.description && !ogData.image) {
90 cache.set(url, { data: null, timestamp: Date.now() });
91 return null;
92 }
93
94 cache.set(url, { data: ogData, timestamp: Date.now() });
95
96 logger.debug(`Fetched OpenGraph data for ${url}:`, ogData);
97 return ogData;
98 } catch (error) {
99 logger.warn(`Failed to fetch OpenGraph data for ${url}:`, error);
100 cache.set(url, { data: null, timestamp: Date.now() });
101 return null;
102 }
103}
104
105export async function extractFirstUrlMetadata(text: string): Promise<OpenGraphData | null> {
106 const urls = extractUrls(text);
107 if (urls.length === 0) return null;
108
109 return await fetchOpenGraphData(urls[0]);
110}
111
112export function cleanupCache(): void {
113 const now = Date.now();
114 for (const [url, cached] of cache.entries()) {
115 if (now - cached.timestamp > CACHE_TTL) {
116 cache.delete(url);
117 }
118 }
119}
120
121function isValidImageUrl(url: string): boolean {
122 try {
123 const parsed = new URL(url);
124 return ['http:', 'https:'].includes(parsed.protocol);
125 } catch {
126 return false;
127 }
128}
129
130setInterval(cleanupCache, 60 * 60 * 1000);