From f10a8308d9f6bfb907c8a2458cbf78b4cfad88d2 Mon Sep 17 00:00:00 2001 From: Aryan Goharzad Date: Thu, 19 Jan 2023 13:53:11 -0500 Subject: Fixes youtube embed issues (#50) * fixes youtube embed * move extractMetaHtml test to its own file * tests cleanup * Add fallback for youtube meta data * lint * Check for youtube in the url domain * use hostname instead of full url to check for link domain * checks only for domain --- src/lib/extractHtmlMeta.ts | 65 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 src/lib/extractHtmlMeta.ts (limited to 'src/lib/extractHtmlMeta.ts') diff --git a/src/lib/extractHtmlMeta.ts b/src/lib/extractHtmlMeta.ts new file mode 100644 index 000000000..2517262be --- /dev/null +++ b/src/lib/extractHtmlMeta.ts @@ -0,0 +1,65 @@ +import {extractYoutubeMeta} from './extractYoutubeMeta' + +interface ExtractHtmlMetaInput { + html: string + hostname?: string +} + +export const extractHtmlMeta = ({ + html, + hostname, +}: ExtractHtmlMetaInput): Record => { + const htmlTitleRegex = /([^<]+)<\/title>/i + + let res: Record<string, string> = {} + + const match = htmlTitleRegex.exec(html) + + if (match) { + res.title = match[1].trim() + } + + let metaMatch + let propMatch + const metaRe = /<meta[\s]([^>]+)>/gis + while ((metaMatch = metaRe.exec(html))) { + let propName + let propValue + const propRe = /(name|property|content)="([^"]+)"/gis + while ((propMatch = propRe.exec(metaMatch[1]))) { + if (propMatch[1] === 'content') { + propValue = propMatch[2] + } else { + propName = propMatch[2] + } + } + if (!propName || !propValue) { + continue + } + switch (propName?.trim()) { + case 'title': + case 'og:title': + case 'twitter:title': + res.title = propValue?.trim() + break + case 'description': + case 'og:description': + case 'twitter:description': + res.description = propValue?.trim() + break + case 'og:image': + case 'twitter:image': + res.image = propValue?.trim() + break + } + } + + const isYoutubeUrl = + hostname?.includes('youtube.') || hostname?.includes('youtu.be') + if (isYoutubeUrl) { + // Workaround for Youtube not having a title in the meta tags + res = {...res, ...extractYoutubeMeta(html)} + } + + return res +} -- cgit 1.4.1