about summary refs log tree commit diff
path: root/src/lib/link-meta/html.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/link-meta/html.ts')
-rw-r--r--src/lib/link-meta/html.ts71
1 files changed, 71 insertions, 0 deletions
diff --git a/src/lib/link-meta/html.ts b/src/lib/link-meta/html.ts
new file mode 100644
index 000000000..220f8431d
--- /dev/null
+++ b/src/lib/link-meta/html.ts
@@ -0,0 +1,71 @@
+import {extractTwitterMeta} from './twitter'
+import {extractYoutubeMeta} from './youtube'
+
+interface ExtractHtmlMetaInput {
+  html: string
+  hostname?: string
+  pathname?: string
+}
+
+export const extractHtmlMeta = ({
+  html,
+  hostname,
+  pathname,
+}: ExtractHtmlMetaInput): Record<string, string> => {
+  const htmlTitleRegex = /<title.*>([^<]+)<\/title>/i
+
+  let res: Record<string, string> = {}
+
+  const match = htmlTitleRegex.exec(html)
+
+  if (match) {
+    res.title = match[1].trim()
+  }
+
+  let metaMatch
+  let propMatch
+  const metaRe = /<meta[\s]([^>]+)>/gis
+  while ((metaMatch = metaRe.exec(html))) {
+    let propName
+    let propValue
+    const propRe = /(name|property|content)="([^"]+)"/gis
+    while ((propMatch = propRe.exec(metaMatch[1]))) {
+      if (propMatch[1] === 'content') {
+        propValue = propMatch[2]
+      } else {
+        propName = propMatch[2]
+      }
+    }
+    if (!propName || !propValue) {
+      continue
+    }
+    switch (propName?.trim()) {
+      case 'title':
+      case 'og:title':
+      case 'twitter:title':
+        res.title = propValue?.trim()
+        break
+      case 'description':
+      case 'og:description':
+      case 'twitter:description':
+        res.description = propValue?.trim()
+        break
+      case 'og:image':
+      case 'twitter:image':
+        res.image = propValue?.trim()
+        break
+    }
+  }
+
+  const isYoutubeUrl =
+    hostname?.includes('youtube.') || hostname?.includes('youtu.be')
+  const isTwitterUrl = hostname?.includes('twitter.')
+  // Workaround for some websites not having a title or description in the meta tags in the initial serve
+  if (isYoutubeUrl) {
+    res = {...res, ...extractYoutubeMeta(html)}
+  } else if (isTwitterUrl && pathname) {
+    res = {...extractTwitterMeta({pathname})}
+  }
+
+  return res
+}