diff options
Diffstat (limited to 'src/lib')
-rw-r--r-- | src/lib/extractHtmlMeta.ts | 65 | ||||
-rw-r--r-- | src/lib/extractYoutubeMeta.ts | 26 | ||||
-rw-r--r-- | src/lib/link-meta.ts | 8 | ||||
-rw-r--r-- | src/lib/strings.ts | 51 |
4 files changed, 97 insertions, 53 deletions
diff --git a/src/lib/extractHtmlMeta.ts b/src/lib/extractHtmlMeta.ts new file mode 100644 index 000000000..2517262be --- /dev/null +++ b/src/lib/extractHtmlMeta.ts @@ -0,0 +1,65 @@ +import {extractYoutubeMeta} from './extractYoutubeMeta' + +interface ExtractHtmlMetaInput { + html: string + hostname?: string +} + +export const extractHtmlMeta = ({ + html, + hostname, +}: ExtractHtmlMetaInput): Record<string, string> => { + const htmlTitleRegex = /<title>([^<]+)<\/title>/i + + let res: Record<string, string> = {} + + const match = htmlTitleRegex.exec(html) + + if (match) { + res.title = match[1].trim() + } + + let metaMatch + let propMatch + const metaRe = /<meta[\s]([^>]+)>/gis + while ((metaMatch = metaRe.exec(html))) { + let propName + let propValue + const propRe = /(name|property|content)="([^"]+)"/gis + while ((propMatch = propRe.exec(metaMatch[1]))) { + if (propMatch[1] === 'content') { + propValue = propMatch[2] + } else { + propName = propMatch[2] + } + } + if (!propName || !propValue) { + continue + } + switch (propName?.trim()) { + case 'title': + case 'og:title': + case 'twitter:title': + res.title = propValue?.trim() + break + case 'description': + case 'og:description': + case 'twitter:description': + res.description = propValue?.trim() + break + case 'og:image': + case 'twitter:image': + res.image = propValue?.trim() + break + } + } + + const isYoutubeUrl = + hostname?.includes('youtube.') || hostname?.includes('youtu.be') + if (isYoutubeUrl) { + // Workaround for Youtube not having a title in the meta tags + res = {...res, ...extractYoutubeMeta(html)} + } + + return res +} diff --git a/src/lib/extractYoutubeMeta.ts b/src/lib/extractYoutubeMeta.ts new file mode 100644 index 000000000..566e3be46 --- /dev/null +++ b/src/lib/extractYoutubeMeta.ts @@ -0,0 +1,26 @@ +export const extractYoutubeMeta = (html: string): Record<string, string> => { + const res: Record<string, string> = {} + const youtubeTitleRegex = /"videoDetails":.*"title":"([^"]*)"/i + const youtubeDescriptionRegex = + /"videoDetails":.*"shortDescription":"([^"]*)"/i + const youtubeThumbnailRegex = /"videoDetails":.*"url":"(.*)(default\.jpg)/i + + const youtubeTitleMatch = youtubeTitleRegex.exec(html) + const youtubeDescriptionMatch = youtubeDescriptionRegex.exec(html) + const youtubeThumbnailMatch = youtubeThumbnailRegex.exec(html) + + if (youtubeTitleMatch && youtubeTitleMatch.length >= 1) { + res.title = decodeURI(youtubeTitleMatch[1]) + } + if (youtubeDescriptionMatch && youtubeDescriptionMatch.length >= 1) { + res.description = decodeURI(youtubeDescriptionMatch[1]).replace( + /\\n/g, + '\n', + ) + } + if (youtubeThumbnailMatch && youtubeThumbnailMatch.length >= 2) { + res.image = youtubeThumbnailMatch[1] + 'default.jpg' + } + + return res +} diff --git a/src/lib/link-meta.ts b/src/lib/link-meta.ts index 9a0325c8f..7e0964c17 100644 --- a/src/lib/link-meta.ts +++ b/src/lib/link-meta.ts @@ -1,7 +1,8 @@ import he from 'he' -import {extractHtmlMeta, isBskyAppUrl} from './strings' +import {isBskyAppUrl} from './strings' import {RootStoreModel} from '../state' import {extractBskyMeta} from './extractBskyMeta' +import {extractHtmlMeta} from './extractHtmlMeta' export enum LikelyType { HTML, @@ -59,7 +60,10 @@ export async function getLinkMeta( }) const httpResBody = await httpRes.text() clearTimeout(to) - const httpResMeta = extractHtmlMeta(httpResBody) + const httpResMeta = extractHtmlMeta({ + html: httpResBody, + hostname: urlp?.hostname, + }) meta.title = httpResMeta.title ? he.decode(httpResMeta.title) : undefined meta.description = httpResMeta.description ? he.decode(httpResMeta.description) diff --git a/src/lib/strings.ts b/src/lib/strings.ts index 04d8656f7..77fe222e4 100644 --- a/src/lib/strings.ts +++ b/src/lib/strings.ts @@ -265,54 +265,3 @@ export function convertBskyAppUrlIfNeeded(url: string): string { } return url } - -const htmlTitleRegex = /<title>([^<]+)<\/title>/i -export function extractHtmlMeta(html: string): Record<string, string> { - const res: Record<string, string> = {} - - { - const match = htmlTitleRegex.exec(html) - if (match) { - res.title = match[1].trim() - } - } - - { - let metaMatch - let propMatch - const metaRe = /<meta[\s]([^>]+)>/gis - while ((metaMatch = metaRe.exec(html))) { - let propName - let propValue - const propRe = /(name|property|content)="([^"]+)"/gis - while ((propMatch = propRe.exec(metaMatch[1]))) { - if (propMatch[1] === 'content') { - propValue = propMatch[2] - } else { - propName = propMatch[2] - } - } - if (!propName || !propValue) { - continue - } - switch (propName?.trim()) { - case 'title': - case 'og:title': - case 'twitter:title': - res.title = propValue?.trim() - break - case 'description': - case 'og:description': - case 'twitter:description': - res.description = propValue?.trim() - break - case 'og:image': - case 'twitter:image': - res.image = propValue?.trim() - break - } - } - } - - return res -} |