mirror of https://git.lenooby09.tech/LeNooby09/social-app.git
1import {extractTwitterMeta} from './twitter'
2import {extractYoutubeMeta} from './youtube'
3
4interface ExtractHtmlMetaInput {
5 html: string
6 hostname?: string
7 pathname?: string
8}
9
10export const extractHtmlMeta = ({
11 html,
12 hostname,
13 pathname,
14}: ExtractHtmlMetaInput): Record<string, string> => {
15 const htmlTitleRegex = /<title.*>([^<]+)<\/title>/i
16
17 let res: Record<string, string> = {}
18
19 const match = htmlTitleRegex.exec(html)
20
21 if (match) {
22 res.title = match[1].trim()
23 }
24
25 let metaMatch
26 let propMatch
27 const metaRe = /<meta[\s]([^>]+)>/gis
28 while ((metaMatch = metaRe.exec(html))) {
29 let propName
30 let propValue
31 const propRe = /(name|property|content)="([^"]+)"/gis
32 while ((propMatch = propRe.exec(metaMatch[1]))) {
33 if (propMatch[1] === 'content') {
34 propValue = propMatch[2]
35 } else {
36 propName = propMatch[2]
37 }
38 }
39 if (!propName || !propValue) {
40 continue
41 }
42 switch (propName?.trim()) {
43 case 'title':
44 case 'og:title':
45 case 'twitter:title':
46 res.title = propValue?.trim()
47 break
48 case 'description':
49 case 'og:description':
50 case 'twitter:description':
51 res.description = propValue?.trim()
52 break
53 case 'og:image':
54 case 'twitter:image':
55 res.image = propValue?.trim()
56 break
57 }
58 }
59
60 const isYoutubeUrl =
61 hostname?.includes('youtube.') || hostname?.includes('youtu.be')
62 const isTwitterUrl = hostname?.includes('twitter.')
63 // Workaround for some websites not having a title or description in the meta tags in the initial serve
64 if (isYoutubeUrl) {
65 res = {...res, ...extractYoutubeMeta(html)}
66 } else if (isTwitterUrl && pathname) {
67 res = {...extractTwitterMeta({pathname})}
68 }
69
70 return res
71}