Aethel Bot OSS repository! aethel.xyz
bot fun ai discord discord-bot aethel
at dev 3.8 kB view raw
1import ogs from 'open-graph-scraper'; 2import logger from './logger'; 3 4export interface OpenGraphData { 5 title?: string; 6 description?: string; 7 image?: string; 8 url?: string; 9 siteName?: string; 10 type?: string; 11} 12 13const URL_REGEX = 14 /https?:\/\/[^\s<>]+|(?:www\.)?[a-zA-Z0-9][-a-zA-Z0-9]*[a-zA-Z0-9]*\.(?:com|org|net|edu|gov|mil|int|xyz|io|co|me|ly|app|dev|tech|info|biz|name|tv|cc|uk|de|fr|jp|cn|au|us|ca|nl|be|it|es|ru|in|br|mx|ch|se|no|dk|fi|pl|cz|hu|ro|bg|hr|sk|si|ee|lv|lt|gr|pt|ie|at|lu)\b(?:\/[^\s<>]*)?/g; 15const CACHE_TTL = 24 * 60 * 60 * 1000; 16const cache = new Map<string, { data: OpenGraphData | null; timestamp: number }>(); 17 18export function extractUrls(text: string): string[] { 19 const matches = text.match(URL_REGEX); 20 if (!matches) return []; 21 22 return matches 23 .map((url) => { 24 let cleanUrl = url.replace(/[.,;:!?)\]}>'"]*$/, ''); 25 26 if (!cleanUrl.startsWith('http://') && !cleanUrl.startsWith('https://')) { 27 cleanUrl = 'https://' + cleanUrl; 28 } 29 30 return cleanUrl; 31 }) 32 .filter((url) => { 33 try { 34 const parsed = new URL(url); 35 const hostname = parsed.hostname.toLowerCase(); 36 const skipDomains = ['t.co', 'bit.ly', 'tinyurl.com', 'is.gd', 'localhost']; 37 38 if (!hostname.includes('.') || hostname.endsWith('.') || hostname.startsWith('.')) { 39 return false; 40 } 41 42 return !skipDomains.includes(hostname) && ['http:', 'https:'].includes(parsed.protocol); 43 } catch { 44 return false; 45 } 46 }) 47 .slice(0, 3); 48} 49 50export async function fetchOpenGraphData(url: string): Promise<OpenGraphData | null> { 51 try { 52 const cached = cache.get(url); 53 if (cached && Date.now() - cached.timestamp < CACHE_TTL) { 54 return cached.data; 55 } 56 57 const options = { 58 url: url, 59 timeout: 5000, 60 fetchOptions: { 61 headers: { 62 'User-Agent': 'Aethel/2.0 (+https://aethel.xyz)', 63 }, 64 }, 65 }; 66 67 const { error, result } = await ogs(options); 68 69 if (error || !result) { 70 cache.set(url, { data: null, timestamp: Date.now() }); 71 return null; 72 } 73 74 let imageUrl = result.ogImage?.[0]?.url || result.twitterImage?.[0]?.url || result.favicon; 75 76 if (imageUrl && !isValidImageUrl(imageUrl)) { 77 imageUrl = undefined; 78 } 79 80 const ogData: OpenGraphData = { 81 title: result.ogTitle || result.twitterTitle || result.dcTitle, 82 description: result.ogDescription || result.twitterDescription || result.dcDescription, 83 image: imageUrl, 84 url: result.ogUrl || result.twitterUrl || url, 85 siteName: result.ogSiteName || result.twitterSite, 86 type: result.ogType || 'website', 87 }; 88 89 if (!ogData.title && !ogData.description && !ogData.image) { 90 cache.set(url, { data: null, timestamp: Date.now() }); 91 return null; 92 } 93 94 cache.set(url, { data: ogData, timestamp: Date.now() }); 95 96 logger.debug(`Fetched OpenGraph data for ${url}:`, ogData); 97 return ogData; 98 } catch (error) { 99 logger.warn(`Failed to fetch OpenGraph data for ${url}:`, error); 100 cache.set(url, { data: null, timestamp: Date.now() }); 101 return null; 102 } 103} 104 105export async function extractFirstUrlMetadata(text: string): Promise<OpenGraphData | null> { 106 const urls = extractUrls(text); 107 if (urls.length === 0) return null; 108 109 return await fetchOpenGraphData(urls[0]); 110} 111 112export function cleanupCache(): void { 113 const now = Date.now(); 114 for (const [url, cached] of cache.entries()) { 115 if (now - cached.timestamp > CACHE_TTL) { 116 cache.delete(url); 117 } 118 } 119} 120 121function isValidImageUrl(url: string): boolean { 122 try { 123 const parsed = new URL(url); 124 return ['http:', 'https:'].includes(parsed.protocol); 125 } catch { 126 return false; 127 } 128} 129 130setInterval(cleanupCache, 60 * 60 * 1000);