about summary refs log tree commit diff
path: root/src/view/com/composer/select-language
diff options
context:
space:
mode:
authorPaul Frazee <pfrazee@gmail.com>2024-01-30 20:16:49 -0800
committerGitHub <noreply@github.com>2024-01-30 20:16:49 -0800
commit31bf286fdc139e82ddc081c8e6c0962495e63fb3 (patch)
treef7b7f38365e5c827d754e53e0ece4b91ebbdae7a /src/view/com/composer/select-language
parent42fe0e160975a0f4984f3350edb5c86aeddf077c (diff)
downloadvoidsky-31bf286fdc139e82ddc081c8e6c0962495e63fb3.tar.zst
Reduce the odds of false positives in the language suggester (close #2598) (#2700)
Diffstat (limited to 'src/view/com/composer/select-language')
-rw-r--r--src/view/com/composer/select-language/SuggestedLanguage.tsx32
1 files changed, 23 insertions, 9 deletions
diff --git a/src/view/com/composer/select-language/SuggestedLanguage.tsx b/src/view/com/composer/select-language/SuggestedLanguage.tsx
index 987d89d36..0bf62ae0d 100644
--- a/src/view/com/composer/select-language/SuggestedLanguage.tsx
+++ b/src/view/com/composer/select-language/SuggestedLanguage.tsx
@@ -23,7 +23,9 @@ const onIdle = globalThis.requestIdleCallback || (cb => setTimeout(cb, 1))
 const cancelIdle = globalThis.cancelIdleCallback || clearTimeout
 
 export function SuggestedLanguage({text}: {text: string}) {
-  const [suggestedLanguage, setSuggestedLanguage] = useState<string>()
+  const [suggestedLanguage, setSuggestedLanguage] = useState<
+    string | undefined
+  >()
   const langPrefs = useLanguagePrefs()
   const setLangPrefs = useLanguagePrefsApi()
   const pal = usePalette('default')
@@ -40,14 +42,7 @@ export function SuggestedLanguage({text}: {text: string}) {
     }
 
     const idle = onIdle(() => {
-      // Only select languages that have a high confidence and convert to code2
-      const result = lande(textTrimmed).filter(
-        ([lang, value]) => value >= 0.97 && code3ToCode2Strict(lang),
-      )
-
-      setSuggestedLanguage(
-        result.length > 0 ? code3ToCode2Strict(result[0][0]) : undefined,
-      )
+      setSuggestedLanguage(guessLanguage(textTrimmed))
     })
 
     return () => cancelIdle(idle)
@@ -99,3 +94,22 @@ const styles = StyleSheet.create({
     marginBottom: 10,
   },
 })
+
+/**
+ * This function is using the lande language model to attempt to detect the language
+ * We want to only make suggestions when we feel a high degree of certainty
+ * The magic numbers are based on debugging sessions against some test strings
+ */
+function guessLanguage(text: string): string | undefined {
+  const scores = lande(text).filter(([_lang, value]) => value >= 0.0002)
+  // if the model has multiple items with a score higher than 0.0002, it isn't certain enough
+  if (scores.length !== 1) {
+    return undefined
+  }
+  const [lang, value] = scores[0]
+  // if the model doesn't give a score of 0.97 or above, it isn't certain enough
+  if (value < 0.97) {
+    return undefined
+  }
+  return code3ToCode2Strict(lang)
+}