about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--__tests__/string-utils.ts73
-rw-r--r--src/lib/strings.ts38
2 files changed, 103 insertions, 8 deletions
diff --git a/__tests__/string-utils.ts b/__tests__/string-utils.ts
index fc7a8f272..a1bd59fee 100644
--- a/__tests__/string-utils.ts
+++ b/__tests__/string-utils.ts
@@ -16,6 +16,7 @@ describe('extractEntities', () => {
     'not@right',
     '@handle.com!@#$chars',
     '@handle.com\n@handle.com',
+    'parenthetical (@handle.com)',
     'start https://middle.com end',
     'start https://middle.com/foo/bar end',
     'start https://middle.com/foo/bar?baz=bux end',
@@ -36,6 +37,12 @@ describe('extractEntities', () => {
     'website.com.jpg',
     'e.g./foo',
     'website.com.jpg/foo',
+    'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
+    'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/ ',
+    'https://foo.com https://bar.com/whatever https://baz.com',
+    'punctuation https://foo.com, https://bar.com/whatever; https://baz.com.',
+    'parenthentical (https://foo.com)',
+    'except for https://foo.com/thing_(cool)',
   ]
   interface Output {
     type: string
@@ -64,6 +71,7 @@ describe('extractEntities', () => {
       {type: 'mention', value: 'handle.com'},
       {type: 'mention', value: 'handle.com'},
     ],
+    [{type: 'mention', value: 'handle.com'}],
     [{type: 'link', value: 'https://middle.com'}],
     [{type: 'link', value: 'https://middle.com/foo/bar'}],
     [{type: 'link', value: 'https://middle.com/foo/bar?baz=bux'}],
@@ -90,6 +98,32 @@ describe('extractEntities', () => {
     [],
     [],
     [],
+    [
+      {
+        type: 'link',
+        value:
+          'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
+      },
+    ],
+    [
+      {
+        type: 'link',
+        value:
+          'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
+      },
+    ],
+    [
+      {type: 'link', value: 'https://foo.com'},
+      {type: 'link', value: 'https://bar.com/whatever'},
+      {type: 'link', value: 'https://baz.com'},
+    ],
+    [
+      {type: 'link', value: 'https://foo.com'},
+      {type: 'link', value: 'https://bar.com/whatever'},
+      {type: 'link', value: 'https://baz.com'},
+    ],
+    [{type: 'link', value: 'https://foo.com'}],
+    [{type: 'link', value: 'https://foo.com/thing_(cool)'}],
   ]
   it('correctly handles a set of text inputs', () => {
     for (let i = 0; i < inputs.length; i++) {
@@ -140,6 +174,7 @@ describe('detectLinkables', () => {
     'not@right',
     '@bad!@#$chars',
     '@newline1\n@newline2',
+    'parenthetical (@handle)',
     'start https://middle.com end',
     'start https://middle.com/foo/bar end',
     'start https://middle.com/foo/bar?baz=bux end',
@@ -161,6 +196,12 @@ describe('detectLinkables', () => {
     'website.com.jpg',
     'e.g./foo',
     'website.com.jpg/foo',
+    'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
+    'Classic article https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/ ',
+    'https://foo.com https://bar.com/whatever https://baz.com',
+    'punctuation https://foo.com, https://bar.com/whatever; https://baz.com.',
+    'parenthentical (https://foo.com)',
+    'except for https://foo.com/thing_(cool)',
   ]
   const outputs = [
     ['no linkable'],
@@ -172,6 +213,7 @@ describe('detectLinkables', () => {
     ['not@right'],
     [{link: '@bad'}, '!@#$chars'],
     [{link: '@newline1'}, '\n', {link: '@newline2'}],
+    ['parenthetical (', {link: '@handle'}, ')'],
     ['start ', {link: 'https://middle.com'}, ' end'],
     ['start ', {link: 'https://middle.com/foo/bar'}, ' end'],
     ['start ', {link: 'https://middle.com/foo/bar?baz=bux'}, ' end'],
@@ -193,6 +235,37 @@ describe('detectLinkables', () => {
     ['website.com.jpg'],
     ['e.g./foo'],
     ['website.com.jpg/foo'],
+    [
+      'Classic article ',
+      {
+        link: 'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
+      },
+    ],
+    [
+      'Classic article ',
+      {
+        link: 'https://socket3.wordpress.com/2018/02/03/designing-windows-95s-user-interface/',
+      },
+      ' ',
+    ],
+    [
+      {link: 'https://foo.com'},
+      ' ',
+      {link: 'https://bar.com/whatever'},
+      ' ',
+      {link: 'https://baz.com'},
+    ],
+    [
+      'punctuation ',
+      {link: 'https://foo.com'},
+      ', ',
+      {link: 'https://bar.com/whatever'},
+      '; ',
+      {link: 'https://baz.com'},
+      '.',
+    ],
+    ['parenthentical (', {link: 'https://foo.com'}, ')'],
+    ['except for ', {link: 'https://foo.com/thing_(cool)'}],
   ]
   it('correctly handles a set of text inputs', () => {
     for (let i = 0; i < inputs.length; i++) {
diff --git a/src/lib/strings.ts b/src/lib/strings.ts
index fb9d15b29..66dd59708 100644
--- a/src/lib/strings.ts
+++ b/src/lib/strings.ts
@@ -74,7 +74,7 @@ export function extractEntities(
   let ents: Entity[] = []
   {
     // mentions
-    const re = /(^|\s)(@)([a-zA-Z0-9\.-]+)(\b)/dg
+    const re = /(^|\s|\()(@)([a-zA-Z0-9\.-]+)(\b)/dg
     while ((match = re.exec(text))) {
       if (knownHandles && !knownHandles.has(match[3])) {
         continue // not a known handle
@@ -94,7 +94,7 @@ export function extractEntities(
   {
     // links
     const re =
-      /(^|\s)((https?:\/\/[\S]+)|((?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))(\b)/dg
+      /(^|\s|\()((https?:\/\/[\S]+)|((?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*))/dgm
     while ((match = re.exec(text))) {
       let value = match[2]
       if (!value.startsWith('http')) {
@@ -104,13 +104,25 @@ export function extractEntities(
         }
         value = `https://${value}`
       }
+      const index = {
+        start: match.indices[2][0], // skip the (^|\s)
+        end: match.indices[2][1],
+      }
+      {
+        // strip ending puncuation
+        if (/[.,;!?]$/.test(value)) {
+          value = value.slice(0, -1)
+          index.end--
+        }
+        if (/[)]$/.test(value) && !value.includes('(')) {
+          value = value.slice(0, -1)
+          index.end--
+        }
+      }
       ents.push({
         type: 'link',
         value,
-        index: {
-          start: match.indices[2][0], // skip the (^|\s)
-          end: match.indices[2][1],
-        },
+        index,
       })
     }
   }
@@ -123,7 +135,7 @@ interface DetectedLink {
 type DetectedLinkable = string | DetectedLink
 export function detectLinkables(text: string): DetectedLinkable[] {
   const re =
-    /((^|\s)@[a-z0-9\.-]*)|((^|\s)https?:\/\/[\S]+)|((^|\s)(?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*)/gi
+    /((^|\s|\()@[a-z0-9\.-]*)|((^|\s|\()https?:\/\/[\S]+)|((^|\s|\()(?<domain>[a-z][a-z0-9]*(\.[a-z0-9]+)+)[\S]*)/gi
   const segments = []
   let match
   let start = 0
@@ -135,7 +147,7 @@ export function detectLinkables(text: string): DetectedLinkable[] {
       continue
     }
 
-    if (/\s/.test(matchValue)) {
+    if (/\s|\(/.test(matchValue)) {
       // HACK
       // skip the starting space
       // we have to do this because RN doesnt support negative lookaheads
@@ -144,6 +156,16 @@ export function detectLinkables(text: string): DetectedLinkable[] {
       matchValue = matchValue.slice(1)
     }
 
+    {
+      // strip ending puncuation
+      if (/[.,;!?]$/.test(matchValue)) {
+        matchValue = matchValue.slice(0, -1)
+      }
+      if (/[)]$/.test(matchValue) && !matchValue.includes('(')) {
+        matchValue = matchValue.slice(0, -1)
+      }
+    }
+
     if (start !== matchIndex) {
       segments.push(text.slice(start, matchIndex))
     }