diff options
author | Aryan Goharzad <arrygoo@gmail.com> | 2023-01-19 13:53:11 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-01-19 12:53:11 -0600 |
commit | f10a8308d9f6bfb907c8a2458cbf78b4cfad88d2 (patch) | |
tree | 0cb50ba6736ea67773e76f9000d07095a654bb6d /src | |
parent | 9230d52ff596056429a773298b2728619afe3432 (diff) | |
download | voidsky-f10a8308d9f6bfb907c8a2458cbf78b4cfad88d2.tar.zst |
Fixes youtube embed issues (#50)
* fixes youtube embed * move extractMetaHtml test to its own file * tests cleanup * Add fallback for youtube meta data * lint * Check for youtube in the url domain * use hostname instead of full url to check for link domain * checks only for domain
Diffstat (limited to 'src')
-rw-r--r-- | src/lib/extractHtmlMeta.ts | 65 | ||||
-rw-r--r-- | src/lib/extractYoutubeMeta.ts | 26 | ||||
-rw-r--r-- | src/lib/link-meta.ts | 8 | ||||
-rw-r--r-- | src/lib/strings.ts | 51 | ||||
-rw-r--r-- | src/view/com/posts/FeedItem.tsx | 8 |
5 files changed, 103 insertions, 55 deletions
diff --git a/src/lib/extractHtmlMeta.ts b/src/lib/extractHtmlMeta.ts new file mode 100644 index 000000000..2517262be --- /dev/null +++ b/src/lib/extractHtmlMeta.ts @@ -0,0 +1,65 @@ +import {extractYoutubeMeta} from './extractYoutubeMeta' + +interface ExtractHtmlMetaInput { + html: string + hostname?: string +} + +export const extractHtmlMeta = ({ + html, + hostname, +}: ExtractHtmlMetaInput): Record<string, string> => { + const htmlTitleRegex = /<title>([^<]+)<\/title>/i + + let res: Record<string, string> = {} + + const match = htmlTitleRegex.exec(html) + + if (match) { + res.title = match[1].trim() + } + + let metaMatch + let propMatch + const metaRe = /<meta[\s]([^>]+)>/gis + while ((metaMatch = metaRe.exec(html))) { + let propName + let propValue + const propRe = /(name|property|content)="([^"]+)"/gis + while ((propMatch = propRe.exec(metaMatch[1]))) { + if (propMatch[1] === 'content') { + propValue = propMatch[2] + } else { + propName = propMatch[2] + } + } + if (!propName || !propValue) { + continue + } + switch (propName?.trim()) { + case 'title': + case 'og:title': + case 'twitter:title': + res.title = propValue?.trim() + break + case 'description': + case 'og:description': + case 'twitter:description': + res.description = propValue?.trim() + break + case 'og:image': + case 'twitter:image': + res.image = propValue?.trim() + break + } + } + + const isYoutubeUrl = + hostname?.includes('youtube.') || hostname?.includes('youtu.be') + if (isYoutubeUrl) { + // Workaround for Youtube not having a title in the meta tags + res = {...res, ...extractYoutubeMeta(html)} + } + + return res +} diff --git a/src/lib/extractYoutubeMeta.ts b/src/lib/extractYoutubeMeta.ts new file mode 100644 index 000000000..566e3be46 --- /dev/null +++ b/src/lib/extractYoutubeMeta.ts @@ -0,0 +1,26 @@ +export const extractYoutubeMeta = (html: string): Record<string, string> => { + const res: Record<string, string> = {} + const youtubeTitleRegex = /"videoDetails":.*"title":"([^"]*)"/i + const youtubeDescriptionRegex = + /"videoDetails":.*"shortDescription":"([^"]*)"/i + const youtubeThumbnailRegex = /"videoDetails":.*"url":"(.*)(default\.jpg)/i + + const youtubeTitleMatch = youtubeTitleRegex.exec(html) + const youtubeDescriptionMatch = youtubeDescriptionRegex.exec(html) + const youtubeThumbnailMatch = youtubeThumbnailRegex.exec(html) + + if (youtubeTitleMatch && youtubeTitleMatch.length >= 1) { + res.title = decodeURI(youtubeTitleMatch[1]) + } + if (youtubeDescriptionMatch && youtubeDescriptionMatch.length >= 1) { + res.description = decodeURI(youtubeDescriptionMatch[1]).replace( + /\\n/g, + '\n', + ) + } + if (youtubeThumbnailMatch && youtubeThumbnailMatch.length >= 2) { + res.image = youtubeThumbnailMatch[1] + 'default.jpg' + } + + return res +} diff --git a/src/lib/link-meta.ts b/src/lib/link-meta.ts index 9a0325c8f..7e0964c17 100644 --- a/src/lib/link-meta.ts +++ b/src/lib/link-meta.ts @@ -1,7 +1,8 @@ import he from 'he' -import {extractHtmlMeta, isBskyAppUrl} from './strings' +import {isBskyAppUrl} from './strings' import {RootStoreModel} from '../state' import {extractBskyMeta} from './extractBskyMeta' +import {extractHtmlMeta} from './extractHtmlMeta' export enum LikelyType { HTML, @@ -59,7 +60,10 @@ export async function getLinkMeta( }) const httpResBody = await httpRes.text() clearTimeout(to) - const httpResMeta = extractHtmlMeta(httpResBody) + const httpResMeta = extractHtmlMeta({ + html: httpResBody, + hostname: urlp?.hostname, + }) meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined meta.description = httpResMeta.description ? he.decode(httpResMeta.description) diff --git a/src/lib/strings.ts b/src/lib/strings.ts index 04d8656f7..77fe222e4 100644 --- a/src/lib/strings.ts +++ b/src/lib/strings.ts @@ -265,54 +265,3 @@ export function convertBskyAppUrlIfNeeded(url: string): string { } return url } - -const htmlTitleRegex = /<title>([^<]+)<\/title>/i -export function extractHtmlMeta(html: string): Record<string, string> { - const res: Record<string, string> = {} - - { - const match = htmlTitleRegex.exec(html) - if (match) { - res.title = match[1].trim() - } - } - - { - let metaMatch - let propMatch - const metaRe = /<meta[\s]([^>]+)>/gis - while ((metaMatch = metaRe.exec(html))) { - let propName - let propValue - const propRe = /(name|property|content)="([^"]+)"/gis - while ((propMatch = propRe.exec(metaMatch[1]))) { - if (propMatch[1] === 'content') { - propValue = propMatch[2] - } else { - propName = propMatch[2] - } - } - if (!propName || !propValue) { - continue - } - switch (propName?.trim()) { - case 'title': - case 'og:title': - case 'twitter:title': - res.title = propValue?.trim() - break - case 'description': - case 'og:description': - case 'twitter:description': - res.description = propValue?.trim() - break - case 'og:image': - case 'twitter:image': - res.image = propValue?.trim() - break - } - } - } - - return res -} diff --git a/src/view/com/posts/FeedItem.tsx b/src/view/com/posts/FeedItem.tsx index 54823d844..4133c17d4 100644 --- a/src/view/com/posts/FeedItem.tsx +++ b/src/view/com/posts/FeedItem.tsx @@ -39,7 +39,9 @@ export const FeedItem = observer(function ({ const itemTitle = `Post by ${item.post.author.handle}` const authorHref = `/profile/${item.post.author.handle}` const replyAuthorDid = useMemo(() => { - if (!record?.reply) return '' + if (!record?.reply) { + return '' + } const urip = new AtUri(record.reply.parent?.uri || record.reply.root.uri) return urip.hostname }, [record?.reply]) @@ -196,7 +198,9 @@ export const FeedItem = observer(function ({ ) : ( <View style={{height: 5}} /> )} - <PostEmbeds embed={item.post.embed} style={styles.embed} /> + {item.post.embed ? ( + <PostEmbeds embed={item.post.embed} style={styles.embed} /> + ) : null} <PostCtrls style={styles.ctrls} itemHref={itemHref} |