blob: 038ca81c605df72207a89a716243cfe7353f3b1b (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
import {extractTwitterMeta} from './extractTwitterMeta'
import {extractYoutubeMeta} from './extractYoutubeMeta'
interface ExtractHtmlMetaInput {
html: string
hostname?: string
pathname?: string
}
export const extractHtmlMeta = ({
html,
hostname,
pathname,
}: ExtractHtmlMetaInput): Record<string, string> => {
const htmlTitleRegex = /<title.*>([^<]+)<\/title>/i
let res: Record<string, string> = {}
const match = htmlTitleRegex.exec(html)
if (match) {
res.title = match[1].trim()
}
let metaMatch
let propMatch
const metaRe = /<meta[\s]([^>]+)>/gis
while ((metaMatch = metaRe.exec(html))) {
let propName
let propValue
const propRe = /(name|property|content)="([^"]+)"/gis
while ((propMatch = propRe.exec(metaMatch[1]))) {
if (propMatch[1] === 'content') {
propValue = propMatch[2]
} else {
propName = propMatch[2]
}
}
if (!propName || !propValue) {
continue
}
switch (propName?.trim()) {
case 'title':
case 'og:title':
case 'twitter:title':
res.title = propValue?.trim()
break
case 'description':
case 'og:description':
case 'twitter:description':
res.description = propValue?.trim()
break
case 'og:image':
case 'twitter:image':
res.image = propValue?.trim()
break
}
}
const isYoutubeUrl =
hostname?.includes('youtube.') || hostname?.includes('youtu.be')
const isTwitterUrl = hostname?.includes('twitter.')
// Workaround for some websites not having a title or description in the meta tags in the initial serve
if (isYoutubeUrl) {
res = {...res, ...extractYoutubeMeta(html)}
} else if (isTwitterUrl) {
res = {...extractTwitterMeta({pathname})}
}
return res
}
|