diff options
author | Paul Frazee <pfrazee@gmail.com> | 2024-01-30 20:16:49 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-30 20:16:49 -0800 |
commit | 31bf286fdc139e82ddc081c8e6c0962495e63fb3 (patch) | |
tree | f7b7f38365e5c827d754e53e0ece4b91ebbdae7a /src/view/com/composer/select-language | |
parent | 42fe0e160975a0f4984f3350edb5c86aeddf077c (diff) | |
download | voidsky-31bf286fdc139e82ddc081c8e6c0962495e63fb3.tar.zst |
Diffstat (limited to 'src/view/com/composer/select-language')
-rw-r--r-- | src/view/com/composer/select-language/SuggestedLanguage.tsx | 32 |
1 files changed, 23 insertions, 9 deletions
diff --git a/src/view/com/composer/select-language/SuggestedLanguage.tsx b/src/view/com/composer/select-language/SuggestedLanguage.tsx index 987d89d36..0bf62ae0d 100644 --- a/src/view/com/composer/select-language/SuggestedLanguage.tsx +++ b/src/view/com/composer/select-language/SuggestedLanguage.tsx @@ -23,7 +23,9 @@ const onIdle = globalThis.requestIdleCallback || (cb => setTimeout(cb, 1)) const cancelIdle = globalThis.cancelIdleCallback || clearTimeout export function SuggestedLanguage({text}: {text: string}) { - const [suggestedLanguage, setSuggestedLanguage] = useState<string>() + const [suggestedLanguage, setSuggestedLanguage] = useState< + string | undefined + >() const langPrefs = useLanguagePrefs() const setLangPrefs = useLanguagePrefsApi() const pal = usePalette('default') @@ -40,14 +42,7 @@ export function SuggestedLanguage({text}: {text: string}) { } const idle = onIdle(() => { - // Only select languages that have a high confidence and convert to code2 - const result = lande(textTrimmed).filter( - ([lang, value]) => value >= 0.97 && code3ToCode2Strict(lang), - ) - - setSuggestedLanguage( - result.length > 0 ? code3ToCode2Strict(result[0][0]) : undefined, - ) + setSuggestedLanguage(guessLanguage(textTrimmed)) }) return () => cancelIdle(idle) @@ -99,3 +94,22 @@ const styles = StyleSheet.create({ marginBottom: 10, }, }) + +/** + * This function is using the lande language model to attempt to detect the language + * We want to only make suggestions when we feel a high degree of certainty + * The magic numbers are based on debugging sessions against some test strings + */ +function guessLanguage(text: string): string | undefined { + const scores = lande(text).filter(([_lang, value]) => value >= 0.0002) + // if the model has multiple items with a score higher than 0.0002, it isn't certain enough + if (scores.length !== 1) { + return undefined + } + const [lang, value] = scores[0] + // if the model doesn't give a score of 0.97 or above, it isn't certain enough + if (value < 0.97) { + return undefined + } + return code3ToCode2Strict(lang) +} |