diff options
-rw-r--r-- | src/lib/__tests__/moderatePost_wrapped.test.ts | 481 | ||||
-rw-r--r-- | src/lib/moderatePost_wrapped.ts | 80 |
2 files changed, 313 insertions, 248 deletions
diff --git a/src/lib/__tests__/moderatePost_wrapped.test.ts b/src/lib/__tests__/moderatePost_wrapped.test.ts index 1d907963f..c35c1ef77 100644 --- a/src/lib/__tests__/moderatePost_wrapped.test.ts +++ b/src/lib/__tests__/moderatePost_wrapped.test.ts @@ -11,12 +11,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'outlineTag', targets: ['tag']}], - rt.text, - rt.facets, - ['outlineTag'], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'outlineTag', targets: ['tag']}], + text: rt.text, + facets: rt.facets, + outlineTags: ['outlineTag'], + }) expect(match).toBe(true) }) @@ -27,12 +27,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'inlineTag', targets: ['tag']}], - rt.text, - rt.facets, - ['outlineTag'], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'inlineTag', targets: ['tag']}], + text: rt.text, + facets: rt.facets, + outlineTags: ['outlineTag'], + }) expect(match).toBe(true) }) @@ -43,12 +43,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'inlineTag', targets: ['content']}], - rt.text, - rt.facets, - ['outlineTag'], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'inlineTag', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: ['outlineTag'], + }) expect(match).toBe(true) }) @@ -59,12 +59,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'inlineTag', targets: ['tag']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'inlineTag', targets: ['tag']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(false) }) @@ -80,12 +80,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'ๅธ', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'ๅธ', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -96,12 +96,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'politics', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'politics', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(false) }) @@ -112,12 +112,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'javascript', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'javascript', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -130,12 +130,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'javascript', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'javascript', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -146,12 +146,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'ai', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'ai', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(false) }) @@ -162,12 +162,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: 'brain', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'brain', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -178,12 +178,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: `:)`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `:)`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -197,23 +197,23 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: yay!`, () => { - const match = hasMutedWord( - [{value: 'yay!', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'yay!', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: yay`, () => { - const match = hasMutedWord( - [{value: 'yay', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'yay', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -226,24 +226,24 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: y!ppee`, () => { - const match = hasMutedWord( - [{value: 'y!ppee', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'y!ppee', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) // single exclamation point, source has double it(`no match: y!ppee!`, () => { - const match = hasMutedWord( - [{value: 'y!ppee!', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'y!ppee!', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -256,23 +256,23 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: S@assy`, () => { - const match = hasMutedWord( - [{value: 'S@assy', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'S@assy', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: s@assy`, () => { - const match = hasMutedWord( - [{value: 's@assy', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 's@assy', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -286,12 +286,12 @@ describe(`hasMutedWord`, () => { // case insensitive it(`match: new york times`, () => { - const match = hasMutedWord( - [{value: 'new york times', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'new york times', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -304,23 +304,23 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: !command`, () => { - const match = hasMutedWord( - [{value: `!command`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `!command`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: command`, () => { - const match = hasMutedWord( - [{value: `command`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `command`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -331,12 +331,12 @@ describe(`hasMutedWord`, () => { }) rt.detectFacetsWithoutResolution() - const match = hasMutedWord( - [{value: `!command`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `!command`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(false) }) @@ -349,23 +349,23 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: e/acc`, () => { - const match = hasMutedWord( - [{value: `e/acc`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `e/acc`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: acc`, () => { - const match = hasMutedWord( - [{value: `acc`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `acc`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -378,45 +378,45 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: super-bad`, () => { - const match = hasMutedWord( - [{value: `super-bad`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `super-bad`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: super`, () => { - const match = hasMutedWord( - [{value: `super`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `super`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: super bad`, () => { - const match = hasMutedWord( - [{value: `super bad`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `super bad`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: superbad`, () => { - const match = hasMutedWord( - [{value: `superbad`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `superbad`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(false) }) @@ -429,47 +429,49 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: idk what this would be`, () => { - const match = hasMutedWord( - [{value: `idk what this would be`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `idk what this would be`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`no match: idk what this would be for`, () => { // extra word - const match = hasMutedWord( - [{value: `idk what this would be for`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [ + {value: `idk what this would be for`, targets: ['content']}, + ], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(false) }) it(`match: idk`, () => { // extra word - const match = hasMutedWord( - [{value: `idk`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `idk`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: idkwhatthiswouldbe`, () => { - const match = hasMutedWord( - [{value: `idkwhatthiswouldbe`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `idkwhatthiswouldbe`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(false) }) @@ -482,45 +484,45 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: context(iykyk)`, () => { - const match = hasMutedWord( - [{value: `context(iykyk)`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `context(iykyk)`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: context`, () => { - const match = hasMutedWord( - [{value: `context`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `context`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: iykyk`, () => { - const match = hasMutedWord( - [{value: `iykyk`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `iykyk`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: (iykyk)`, () => { - const match = hasMutedWord( - [{value: `(iykyk)`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `(iykyk)`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -533,12 +535,12 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: ๐ฆ`, () => { - const match = hasMutedWord( - [{value: `๐ฆ`, targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: `๐ฆ`, targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) @@ -553,23 +555,46 @@ describe(`hasMutedWord`, () => { rt.detectFacetsWithoutResolution() it(`match: stop worrying`, () => { - const match = hasMutedWord( - [{value: 'stop worrying', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'stop worrying', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) expect(match).toBe(true) }) it(`match: turtles, or how`, () => { - const match = hasMutedWord( - [{value: 'turtles, or how', targets: ['content']}], - rt.text, - rt.facets, - [], - ) + const match = hasMutedWord({ + mutedWords: [{value: 'turtles, or how', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + }) + + expect(match).toBe(true) + }) + }) + }) + + describe(`languages without spaces`, () => { + // I love turtles, or how I learned to stop worrying and love the internet + describe(`็งใฏใซใกใๅฅฝใใงใใใพใใฏใฉใฎใใใซใใฆๅฟ้ ใใใฎใใใใฆใคใณใฟใผใใใใๆใใใใใซใชใฃใใฎใ`, () => { + const rt = new RichText({ + text: `็งใฏใซใกใๅฅฝใใงใใใพใใฏใฉใฎใใใซใใฆๅฟ้ ใใใฎใใใใฆใคใณใฟใผใใใใๆใใใใใซใชใฃใใฎใ`, + }) + rt.detectFacetsWithoutResolution() + + // internet + it(`match: ใคใณใฟใผใใใ`, () => { + const match = hasMutedWord({ + mutedWords: [{value: 'ใคใณใฟใผใใใ', targets: ['content']}], + text: rt.text, + facets: rt.facets, + outlineTags: [], + languages: ['ja'], + }) expect(match).toBe(true) }) diff --git a/src/lib/moderatePost_wrapped.ts b/src/lib/moderatePost_wrapped.ts index 862f2de6f..428dbabf4 100644 --- a/src/lib/moderatePost_wrapped.ts +++ b/src/lib/moderatePost_wrapped.ts @@ -21,12 +21,34 @@ const REGEX = { WORD_BOUNDARY: /[\s\n\t\r\f\v]+?/g, } -export function hasMutedWord( - mutedWords: AppBskyActorDefs.MutedWord[], - text: string, - facets?: AppBskyRichtextFacet.Main[], - outlineTags?: string[], -) { +/** + * List of 2-letter lang codes for languages that either don't use spaces, or + * don't use spaces in a way conducive to word-based filtering. + * + * For these, we use a simple `String.includes` to check for a match. + */ +const LANGUAGE_EXCEPTIONS = [ + 'ja', // Japanese + 'zh', // Chinese + 'ko', // Korean + 'th', // Thai + 'vi', // Vietnamese +] + +export function hasMutedWord({ + mutedWords, + text, + facets, + outlineTags, + languages, +}: { + mutedWords: AppBskyActorDefs.MutedWord[] + text: string + facets?: AppBskyRichtextFacet.Main[] + outlineTags?: string[] + languages?: string[] +}) { + const exception = LANGUAGE_EXCEPTIONS.includes(languages?.[0] || '') const tags = ([] as string[]) .concat(outlineTags || []) .concat( @@ -48,8 +70,9 @@ export function hasMutedWord( if (tags.includes(mutedWord)) return true // rest of the checks are for `content` only if (!mute.targets.includes('content')) continue - // single character, has to use includes - if (mutedWord.length === 1 && postText.includes(mutedWord)) return true + // single character or other exception, has to use includes + if ((mutedWord.length === 1 || exception) && postText.includes(mutedWord)) + return true // too long if (mutedWord.length > postText.length) continue // exact match @@ -134,19 +157,28 @@ export function moderatePost_wrapped( } if (AppBskyFeedPost.isRecord(subject.record)) { - let muted = hasMutedWord( + let muted = hasMutedWord({ mutedWords, - subject.record.text, - subject.record.facets || [], - subject.record.tags || [], - ) + text: subject.record.text, + facets: subject.record.facets || [], + outlineTags: subject.record.tags || [], + languages: subject.record.langs, + }) if ( subject.record.embed && AppBskyEmbedImages.isMain(subject.record.embed) ) { for (const image of subject.record.embed.images) { - muted = muted || hasMutedWord(mutedWords, image.alt, [], []) + muted = + muted || + hasMutedWord({ + mutedWords, + text: image.alt, + facets: [], + outlineTags: [], + languages: subject.record.langs, + }) } } @@ -172,17 +204,25 @@ export function moderatePost_wrapped( if (AppBskyFeedPost.isRecord(subject.embed.record.value)) { embedHidden = embedHidden || - hasMutedWord( + hasMutedWord({ mutedWords, - subject.embed.record.value.text, - subject.embed.record.value.facets, - subject.embed.record.value.tags, - ) + text: subject.embed.record.value.text, + facets: subject.embed.record.value.facets, + outlineTags: subject.embed.record.value.tags, + languages: subject.embed.record.value.langs, + }) if (AppBskyEmbedImages.isMain(subject.embed.record.value.embed)) { for (const image of subject.embed.record.value.embed.images) { embedHidden = - embedHidden || hasMutedWord(mutedWords, image.alt, [], []) + embedHidden || + hasMutedWord({ + mutedWords, + text: image.alt, + facets: [], + outlineTags: [], + languages: subject.embed.record.value.langs, + }) } } } |