diff options
author | Paul Frazee <pfrazee@gmail.com> | 2022-11-28 10:22:08 -0600 |
---|---|---|
committer | Paul Frazee <pfrazee@gmail.com> | 2022-11-28 10:22:08 -0600 |
commit | bcb1ad98de0ab2d2b184e030d8a7bcbb1e4e288b (patch) | |
tree | a19c7c054ce78d9a44169cbc68170faba216b67c /src/lib/strings.ts | |
parent | 8723b51693f824fbd62218ab0789a76b34315904 (diff) | |
download | voidsky-bcb1ad98de0ab2d2b184e030d8a7bcbb1e4e288b.tar.zst |
Fix a couple incorrect link detections ('e.g.' and 'foo.jpg') (close #13)
Diffstat (limited to 'src/lib/strings.ts')
-rw-r--r-- | src/lib/strings.ts | 21 |
1 files changed, 19 insertions, 2 deletions
diff --git a/src/lib/strings.ts b/src/lib/strings.ts index 032eec566..fb9d15b29 100644 --- a/src/lib/strings.ts +++ b/src/lib/strings.ts @@ -1,6 +1,7 @@ import {AtUri} from '../third-party/uri' import {Entity} from '../third-party/api/src/client/types/app/bsky/feed/post' import {PROD_SERVICE} from '../state' +import TLDs from 'tlds' export const MAX_DISPLAY_NAME = 64 export const MAX_DESCRIPTION = 256 @@ -57,6 +58,14 @@ export function ago(date: number | string | Date): string { } } +export function isValidDomain(str: string): boolean { + return !!TLDs.find(tld => { + let i = str.lastIndexOf(tld) + if (i === -1) return false + return str.charAt(i - 1) === '.' && i === str.length - tld.length + }) +} + export function extractEntities( text: string, knownHandles?: Set<string>, @@ -85,10 +94,14 @@ export function extractEntities( { // links const re = - /(^|\s)((https?:\/\/[\S]+)|([a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*))(\b)/dg + /(^|\s)((https?:\/\/[\S]+)|((?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))(\b)/dg while ((match = re.exec(text))) { let value = match[2] if (!value.startsWith('http')) { + const domain = match.groups?.domain + if (!domain || !isValidDomain(domain)) { + continue + } value = `https://${value}` } ents.push({ @@ -110,7 +123,7 @@ interface DetectedLink { type DetectedLinkable = string | DetectedLink export function detectLinkables(text: string): DetectedLinkable[] { const re = - /((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)[a-z][a-z0-9]*(\.[a-z0-9]+)+[\S]*)/gi + /((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)(?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*)/gi const segments = [] let match let start = 0 @@ -118,6 +131,10 @@ export function detectLinkables(text: string): DetectedLinkable[] { let matchIndex = match.index let matchValue = match[0] + if (match.groups?.domain && !isValidDomain(match.groups?.domain)) { + continue + } + if (/\s/.test(matchValue)) { // HACK // skip the starting space |