about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorAryan Goharzad <arrygoo@gmail.com>2023-01-19 13:53:11 -0500
committerGitHub <noreply@github.com>2023-01-19 12:53:11 -0600
commitf10a8308d9f6bfb907c8a2458cbf78b4cfad88d2 (patch)
tree0cb50ba6736ea67773e76f9000d07095a654bb6d /src
parent9230d52ff596056429a773298b2728619afe3432 (diff)
downloadvoidsky-f10a8308d9f6bfb907c8a2458cbf78b4cfad88d2.tar.zst
Fixes youtube embed issues (#50)
* fixes youtube embed

* move extractMetaHtml test to its own file

* tests cleanup

* Add fallback for youtube meta data

* lint

* Check for youtube in the url domain

* use hostname instead of full url to check for link domain

* checks only for domain
Diffstat (limited to 'src')
-rw-r--r--src/lib/extractHtmlMeta.ts65
-rw-r--r--src/lib/extractYoutubeMeta.ts26
-rw-r--r--src/lib/link-meta.ts8
-rw-r--r--src/lib/strings.ts51
-rw-r--r--src/view/com/posts/FeedItem.tsx8
5 files changed, 103 insertions, 55 deletions
diff --git a/src/lib/extractHtmlMeta.ts b/src/lib/extractHtmlMeta.ts
new file mode 100644
index 000000000..2517262be
--- /dev/null
+++ b/src/lib/extractHtmlMeta.ts
@@ -0,0 +1,65 @@
+import {extractYoutubeMeta} from './extractYoutubeMeta'
+
+interface ExtractHtmlMetaInput {
+  html: string
+  hostname?: string
+}
+
+export const extractHtmlMeta = ({
+  html,
+  hostname,
+}: ExtractHtmlMetaInput): Record<string, string> => {
+  const htmlTitleRegex = /<title>([^<]+)<\/title>/i
+
+  let res: Record<string, string> = {}
+
+  const match = htmlTitleRegex.exec(html)
+
+  if (match) {
+    res.title = match[1].trim()
+  }
+
+  let metaMatch
+  let propMatch
+  const metaRe = /<meta[\s]([^>]+)>/gis
+  while ((metaMatch = metaRe.exec(html))) {
+    let propName
+    let propValue
+    const propRe = /(name|property|content)="([^"]+)"/gis
+    while ((propMatch = propRe.exec(metaMatch[1]))) {
+      if (propMatch[1] === 'content') {
+        propValue = propMatch[2]
+      } else {
+        propName = propMatch[2]
+      }
+    }
+    if (!propName || !propValue) {
+      continue
+    }
+    switch (propName?.trim()) {
+      case 'title':
+      case 'og:title':
+      case 'twitter:title':
+        res.title = propValue?.trim()
+        break
+      case 'description':
+      case 'og:description':
+      case 'twitter:description':
+        res.description = propValue?.trim()
+        break
+      case 'og:image':
+      case 'twitter:image':
+        res.image = propValue?.trim()
+        break
+    }
+  }
+
+  const isYoutubeUrl =
+    hostname?.includes('youtube.') || hostname?.includes('youtu.be')
+  if (isYoutubeUrl) {
+    // Workaround for Youtube not having a title in the meta tags
+    res = {...res, ...extractYoutubeMeta(html)}
+  }
+
+  return res
+}
diff --git a/src/lib/extractYoutubeMeta.ts b/src/lib/extractYoutubeMeta.ts
new file mode 100644
index 000000000..566e3be46
--- /dev/null
+++ b/src/lib/extractYoutubeMeta.ts
@@ -0,0 +1,26 @@
+export const extractYoutubeMeta = (html: string): Record<string, string> => {
+  const res: Record<string, string> = {}
+  const youtubeTitleRegex = /"videoDetails":.*"title":"([^"]*)"/i
+  const youtubeDescriptionRegex =
+    /"videoDetails":.*"shortDescription":"([^"]*)"/i
+  const youtubeThumbnailRegex = /"videoDetails":.*"url":"(.*)(default\.jpg)/i
+
+  const youtubeTitleMatch = youtubeTitleRegex.exec(html)
+  const youtubeDescriptionMatch = youtubeDescriptionRegex.exec(html)
+  const youtubeThumbnailMatch = youtubeThumbnailRegex.exec(html)
+
+  if (youtubeTitleMatch && youtubeTitleMatch.length >= 1) {
+    res.title = decodeURI(youtubeTitleMatch[1])
+  }
+  if (youtubeDescriptionMatch && youtubeDescriptionMatch.length >= 1) {
+    res.description = decodeURI(youtubeDescriptionMatch[1]).replace(
+      /\\n/g,
+      '\n',
+    )
+  }
+  if (youtubeThumbnailMatch && youtubeThumbnailMatch.length >= 2) {
+    res.image = youtubeThumbnailMatch[1] + 'default.jpg'
+  }
+
+  return res
+}
diff --git a/src/lib/link-meta.ts b/src/lib/link-meta.ts
index 9a0325c8f..7e0964c17 100644
--- a/src/lib/link-meta.ts
+++ b/src/lib/link-meta.ts
@@ -1,7 +1,8 @@
 import he from 'he'
-import {extractHtmlMeta, isBskyAppUrl} from './strings'
+import {isBskyAppUrl} from './strings'
 import {RootStoreModel} from '../state'
 import {extractBskyMeta} from './extractBskyMeta'
+import {extractHtmlMeta} from './extractHtmlMeta'
 
 export enum LikelyType {
   HTML,
@@ -59,7 +60,10 @@ export async function getLinkMeta(
     })
     const httpResBody = await httpRes.text()
     clearTimeout(to)
-    const httpResMeta = extractHtmlMeta(httpResBody)
+    const httpResMeta = extractHtmlMeta({
+      html: httpResBody,
+      hostname: urlp?.hostname,
+    })
     meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined
     meta.description = httpResMeta.description
       ? he.decode(httpResMeta.description)
diff --git a/src/lib/strings.ts b/src/lib/strings.ts
index 04d8656f7..77fe222e4 100644
--- a/src/lib/strings.ts
+++ b/src/lib/strings.ts
@@ -265,54 +265,3 @@ export function convertBskyAppUrlIfNeeded(url: string): string {
   }
   return url
 }
-
-const htmlTitleRegex = /<title>([^<]+)<\/title>/i
-export function extractHtmlMeta(html: string): Record<string, string> {
-  const res: Record<string, string> = {}
-
-  {
-    const match = htmlTitleRegex.exec(html)
-    if (match) {
-      res.title = match[1].trim()
-    }
-  }
-
-  {
-    let metaMatch
-    let propMatch
-    const metaRe = /<meta[\s]([^>]+)>/gis
-    while ((metaMatch = metaRe.exec(html))) {
-      let propName
-      let propValue
-      const propRe = /(name|property|content)="([^"]+)"/gis
-      while ((propMatch = propRe.exec(metaMatch[1]))) {
-        if (propMatch[1] === 'content') {
-          propValue = propMatch[2]
-        } else {
-          propName = propMatch[2]
-        }
-      }
-      if (!propName || !propValue) {
-        continue
-      }
-      switch (propName?.trim()) {
-        case 'title':
-        case 'og:title':
-        case 'twitter:title':
-          res.title = propValue?.trim()
-          break
-        case 'description':
-        case 'og:description':
-        case 'twitter:description':
-          res.description = propValue?.trim()
-          break
-        case 'og:image':
-        case 'twitter:image':
-          res.image = propValue?.trim()
-          break
-      }
-    }
-  }
-
-  return res
-}
diff --git a/src/view/com/posts/FeedItem.tsx b/src/view/com/posts/FeedItem.tsx
index 54823d844..4133c17d4 100644
--- a/src/view/com/posts/FeedItem.tsx
+++ b/src/view/com/posts/FeedItem.tsx
@@ -39,7 +39,9 @@ export const FeedItem = observer(function ({
   const itemTitle = `Post by ${item.post.author.handle}`
   const authorHref = `/profile/${item.post.author.handle}`
   const replyAuthorDid = useMemo(() => {
-    if (!record?.reply) return ''
+    if (!record?.reply) {
+      return ''
+    }
     const urip = new AtUri(record.reply.parent?.uri || record.reply.root.uri)
     return urip.hostname
   }, [record?.reply])
@@ -196,7 +198,9 @@ export const FeedItem = observer(function ({
             ) : (
               <View style={{height: 5}} />
             )}
-            <PostEmbeds embed={item.post.embed} style={styles.embed} />
+            {item.post.embed ? (
+              <PostEmbeds embed={item.post.embed} style={styles.embed} />
+            ) : null}
             <PostCtrls
               style={styles.ctrls}
               itemHref={itemHref}