diff options
Diffstat (limited to 'src/lib/moderatePost_wrapped.ts')
-rw-r--r-- | src/lib/moderatePost_wrapped.ts | 205 |
1 files changed, 204 insertions, 1 deletions
diff --git a/src/lib/moderatePost_wrapped.ts b/src/lib/moderatePost_wrapped.ts index 2195b2304..92543b42c 100644 --- a/src/lib/moderatePost_wrapped.ts +++ b/src/lib/moderatePost_wrapped.ts @@ -2,19 +2,151 @@ import { AppBskyEmbedRecord, AppBskyEmbedRecordWithMedia, moderatePost, + AppBskyActorDefs, + AppBskyFeedPost, + AppBskyRichtextFacet, + AppBskyEmbedImages, } from '@atproto/api' type ModeratePost = typeof moderatePost type Options = Parameters<ModeratePost>[1] & { hiddenPosts?: string[] + mutedWords?: AppBskyActorDefs.MutedWord[] +} + +const REGEX = { + LEADING_TRAILING_PUNCTUATION: /(?:^\p{P}+|\p{P}+$)/gu, + ESCAPE: /[[\]{}()*+?.\\^$|\s]/g, + SEPARATORS: /[\/\-\–\—\(\)\[\]\_]+/g, + WORD_BOUNDARY: /[\s\n\t\r\f\v]+?/g, +} + +/** + * List of 2-letter lang codes for languages that either don't use spaces, or + * don't use spaces in a way conducive to word-based filtering. + * + * For these, we use a simple `String.includes` to check for a match. + */ +const LANGUAGE_EXCEPTIONS = [ + 'ja', // Japanese + 'zh', // Chinese + 'ko', // Korean + 'th', // Thai + 'vi', // Vietnamese +] + +export function hasMutedWord({ + mutedWords, + text, + facets, + outlineTags, + languages, + isOwnPost, +}: { + mutedWords: AppBskyActorDefs.MutedWord[] + text: string + facets?: AppBskyRichtextFacet.Main[] + outlineTags?: string[] + languages?: string[] + isOwnPost: boolean +}) { + if (isOwnPost) return false + + const exception = LANGUAGE_EXCEPTIONS.includes(languages?.[0] || '') + const tags = ([] as string[]) + .concat(outlineTags || []) + .concat( + facets + ?.filter(facet => { + return facet.features.find(feature => + AppBskyRichtextFacet.isTag(feature), + ) + }) + .map(t => t.features[0].tag as string) || [], + ) + .map(t => t.toLowerCase()) + + for (const mute of mutedWords) { + const mutedWord = mute.value.toLowerCase() + const postText = text.toLowerCase() + + // `content` applies to tags as well + if (tags.includes(mutedWord)) return true + // rest of the checks are for `content` only + if (!mute.targets.includes('content')) continue + // single character or other exception, has to use includes + if ((mutedWord.length === 1 || exception) && postText.includes(mutedWord)) + return true + // too long + if (mutedWord.length > postText.length) continue + // exact match + if (mutedWord === postText) return true + // any muted phrase with space or punctuation + if (/(?:\s|\p{P})+?/u.test(mutedWord) && postText.includes(mutedWord)) + return true + + // check individual character groups + const words = postText.split(REGEX.WORD_BOUNDARY) + for (const word of words) { + if (word === mutedWord) return true + + // compare word without leading/trailing punctuation, but allow internal + // punctuation (such as `s@ssy`) + const wordTrimmedPunctuation = word.replace( + REGEX.LEADING_TRAILING_PUNCTUATION, + '', + ) + + if (mutedWord === wordTrimmedPunctuation) return true + if (mutedWord.length > wordTrimmedPunctuation.length) continue + + // handle hyphenated, slash separated words, etc + if (REGEX.SEPARATORS.test(wordTrimmedPunctuation)) { + // check against full normalized phrase + const wordNormalizedSeparators = wordTrimmedPunctuation.replace( + REGEX.SEPARATORS, + ' ', + ) + const mutedWordNormalizedSeparators = mutedWord.replace( + REGEX.SEPARATORS, + ' ', + ) + // hyphenated (or other sep) to spaced words + if (wordNormalizedSeparators === mutedWordNormalizedSeparators) + return true + + /* Disabled for now e.g. `super-cool` to `supercool` + const wordNormalizedCompressed = wordNormalizedSeparators.replace( + REGEX.WORD_BOUNDARY, + '', + ) + const mutedWordNormalizedCompressed = + mutedWordNormalizedSeparators.replace(/\s+?/g, '') + // hyphenated (or other sep) to non-hyphenated contiguous word + if (mutedWordNormalizedCompressed === wordNormalizedCompressed) + return true + */ + + // then individual parts of separated phrases/words + const wordParts = wordTrimmedPunctuation.split(REGEX.SEPARATORS) + for (const wp of wordParts) { + // still retain internal punctuation + if (wp === mutedWord) return true + } + } + } + } + + return false } export function moderatePost_wrapped( subject: Parameters<ModeratePost>[0], opts: Options, ) { - const {hiddenPosts = [], ...options} = opts + const {hiddenPosts = [], mutedWords = [], ...options} = opts const moderations = moderatePost(subject, options) + const isOwnPost = subject.author.did === opts.userDid if (hiddenPosts.includes(subject.uri)) { moderations.content.filter = true @@ -29,15 +161,86 @@ export function moderatePost_wrapped( } } + if (AppBskyFeedPost.isRecord(subject.record)) { + let muted = hasMutedWord({ + mutedWords, + text: subject.record.text, + facets: subject.record.facets || [], + outlineTags: subject.record.tags || [], + languages: subject.record.langs, + isOwnPost, + }) + + if ( + subject.record.embed && + AppBskyEmbedImages.isMain(subject.record.embed) + ) { + for (const image of subject.record.embed.images) { + muted = + muted || + hasMutedWord({ + mutedWords, + text: image.alt, + facets: [], + outlineTags: [], + languages: subject.record.langs, + isOwnPost, + }) + } + } + + if (muted) { + moderations.content.filter = true + moderations.content.blur = true + if (!moderations.content.cause) { + moderations.content.cause = { + // @ts-ignore Temporary extension to the moderation system -prf + type: 'muted-word', + source: {type: 'user'}, + priority: 1, + } + } + } + } + if (subject.embed) { let embedHidden = false if (AppBskyEmbedRecord.isViewRecord(subject.embed.record)) { embedHidden = hiddenPosts.includes(subject.embed.record.uri) + + if (AppBskyFeedPost.isRecord(subject.embed.record.value)) { + embedHidden = + embedHidden || + hasMutedWord({ + mutedWords, + text: subject.embed.record.value.text, + facets: subject.embed.record.value.facets, + outlineTags: subject.embed.record.value.tags, + languages: subject.embed.record.value.langs, + isOwnPost, + }) + + if (AppBskyEmbedImages.isMain(subject.embed.record.value.embed)) { + for (const image of subject.embed.record.value.embed.images) { + embedHidden = + embedHidden || + hasMutedWord({ + mutedWords, + text: image.alt, + facets: [], + outlineTags: [], + languages: subject.embed.record.value.langs, + isOwnPost, + }) + } + } + } } if ( AppBskyEmbedRecordWithMedia.isView(subject.embed) && AppBskyEmbedRecord.isViewRecord(subject.embed.record.record) ) { + // TODO what embedHidden = hiddenPosts.includes(subject.embed.record.record.uri) } if (embedHidden) { |