about summary refs log tree commit diff
path: root/src/lib
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib')
-rw-r--r--src/lib/extractHtmlMeta.ts65
-rw-r--r--src/lib/extractYoutubeMeta.ts26
-rw-r--r--src/lib/link-meta.ts8
-rw-r--r--src/lib/strings.ts51
4 files changed, 97 insertions, 53 deletions
diff --git a/src/lib/extractHtmlMeta.ts b/src/lib/extractHtmlMeta.ts
new file mode 100644
index 000000000..2517262be
--- /dev/null
+++ b/src/lib/extractHtmlMeta.ts
@@ -0,0 +1,65 @@
+import {extractYoutubeMeta} from './extractYoutubeMeta'
+
+interface ExtractHtmlMetaInput {
+  html: string
+  hostname?: string
+}
+
+export const extractHtmlMeta = ({
+  html,
+  hostname,
+}: ExtractHtmlMetaInput): Record<string, string> => {
+  const htmlTitleRegex = /<title>([^<]+)<\/title>/i
+
+  let res: Record<string, string> = {}
+
+  const match = htmlTitleRegex.exec(html)
+
+  if (match) {
+    res.title = match[1].trim()
+  }
+
+  let metaMatch
+  let propMatch
+  const metaRe = /<meta[\s]([^>]+)>/gis
+  while ((metaMatch = metaRe.exec(html))) {
+    let propName
+    let propValue
+    const propRe = /(name|property|content)="([^"]+)"/gis
+    while ((propMatch = propRe.exec(metaMatch[1]))) {
+      if (propMatch[1] === 'content') {
+        propValue = propMatch[2]
+      } else {
+        propName = propMatch[2]
+      }
+    }
+    if (!propName || !propValue) {
+      continue
+    }
+    switch (propName?.trim()) {
+      case 'title':
+      case 'og:title':
+      case 'twitter:title':
+        res.title = propValue?.trim()
+        break
+      case 'description':
+      case 'og:description':
+      case 'twitter:description':
+        res.description = propValue?.trim()
+        break
+      case 'og:image':
+      case 'twitter:image':
+        res.image = propValue?.trim()
+        break
+    }
+  }
+
+  const isYoutubeUrl =
+    hostname?.includes('youtube.') || hostname?.includes('youtu.be')
+  if (isYoutubeUrl) {
+    // Workaround for Youtube not having a title in the meta tags
+    res = {...res, ...extractYoutubeMeta(html)}
+  }
+
+  return res
+}
diff --git a/src/lib/extractYoutubeMeta.ts b/src/lib/extractYoutubeMeta.ts
new file mode 100644
index 000000000..566e3be46
--- /dev/null
+++ b/src/lib/extractYoutubeMeta.ts
@@ -0,0 +1,26 @@
+export const extractYoutubeMeta = (html: string): Record<string, string> => {
+  const res: Record<string, string> = {}
+  const youtubeTitleRegex = /"videoDetails":.*"title":"([^"]*)"/i
+  const youtubeDescriptionRegex =
+    /"videoDetails":.*"shortDescription":"([^"]*)"/i
+  const youtubeThumbnailRegex = /"videoDetails":.*"url":"(.*)(default\.jpg)/i
+
+  const youtubeTitleMatch = youtubeTitleRegex.exec(html)
+  const youtubeDescriptionMatch = youtubeDescriptionRegex.exec(html)
+  const youtubeThumbnailMatch = youtubeThumbnailRegex.exec(html)
+
+  if (youtubeTitleMatch && youtubeTitleMatch.length >= 1) {
+    res.title = decodeURI(youtubeTitleMatch[1])
+  }
+  if (youtubeDescriptionMatch && youtubeDescriptionMatch.length >= 1) {
+    res.description = decodeURI(youtubeDescriptionMatch[1]).replace(
+      /\\n/g,
+      '\n',
+    )
+  }
+  if (youtubeThumbnailMatch && youtubeThumbnailMatch.length >= 2) {
+    res.image = youtubeThumbnailMatch[1] + 'default.jpg'
+  }
+
+  return res
+}
diff --git a/src/lib/link-meta.ts b/src/lib/link-meta.ts
index 9a0325c8f..7e0964c17 100644
--- a/src/lib/link-meta.ts
+++ b/src/lib/link-meta.ts
@@ -1,7 +1,8 @@
 import he from 'he'
-import {extractHtmlMeta, isBskyAppUrl} from './strings'
+import {isBskyAppUrl} from './strings'
 import {RootStoreModel} from '../state'
 import {extractBskyMeta} from './extractBskyMeta'
+import {extractHtmlMeta} from './extractHtmlMeta'
 
 export enum LikelyType {
   HTML,
@@ -59,7 +60,10 @@ export async function getLinkMeta(
     })
     const httpResBody = await httpRes.text()
     clearTimeout(to)
-    const httpResMeta = extractHtmlMeta(httpResBody)
+    const httpResMeta = extractHtmlMeta({
+      html: httpResBody,
+      hostname: urlp?.hostname,
+    })
     meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined
     meta.description = httpResMeta.description
       ? he.decode(httpResMeta.description)
diff --git a/src/lib/strings.ts b/src/lib/strings.ts
index 04d8656f7..77fe222e4 100644
--- a/src/lib/strings.ts
+++ b/src/lib/strings.ts
@@ -265,54 +265,3 @@ export function convertBskyAppUrlIfNeeded(url: string): string {
   }
   return url
 }
-
-const htmlTitleRegex = /<title>([^<]+)<\/title>/i
-export function extractHtmlMeta(html: string): Record<string, string> {
-  const res: Record<string, string> = {}
-
-  {
-    const match = htmlTitleRegex.exec(html)
-    if (match) {
-      res.title = match[1].trim()
-    }
-  }
-
-  {
-    let metaMatch
-    let propMatch
-    const metaRe = /<meta[\s]([^>]+)>/gis
-    while ((metaMatch = metaRe.exec(html))) {
-      let propName
-      let propValue
-      const propRe = /(name|property|content)="([^"]+)"/gis
-      while ((propMatch = propRe.exec(metaMatch[1]))) {
-        if (propMatch[1] === 'content') {
-          propValue = propMatch[2]
-        } else {
-          propName = propMatch[2]
-        }
-      }
-      if (!propName || !propValue) {
-        continue
-      }
-      switch (propName?.trim()) {
-        case 'title':
-        case 'og:title':
-        case 'twitter:title':
-          res.title = propValue?.trim()
-          break
-        case 'description':
-        case 'og:description':
-        case 'twitter:description':
-          res.description = propValue?.trim()
-          break
-        case 'og:image':
-        case 'twitter:image':
-          res.image = propValue?.trim()
-          break
-      }
-    }
-  }
-
-  return res
-}