fix: correct the text language detection

This commit is contained in:
Namu
2026-05-23 15:39:50 +02:00
parent 8979550349
commit 76121b8c68
2 changed files with 22 additions and 21 deletions

View File

@@ -50,4 +50,5 @@ if __name__ == '__main__':
text_2_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_2) text_2_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_2)
text_3_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_3) text_3_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_3)
print('Résultat sur les textes ----------------------------------------------')
print(f'texte 1 {text_1_result}, texte 2 {text_2_result}, texte 3 {text_3_result}') print(f'texte 1 {text_1_result}, texte 2 {text_2_result}, texte 3 {text_3_result}')

View File

@@ -1,5 +1,5 @@
""" """
Ce module contient des fonctions utilisataires Ce module contient des fonctions utilitaires
""" """
from HMM import HMM from HMM import HMM
@@ -9,14 +9,14 @@ def normalize_probabilities(prob_fr: float, prob_en: float, prob_it: float, sear
return searched / sum return searched / sum
def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> str: def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> tuple[str, float, list[float]]:
""" """
Prints the language detected with forward method Prints the language detected with forward method
:param hmm_fr: lambda_fr :param hmm_fr: lambda_fr
:param hmm_en: lambda_en :param hmm_en: lambda_en
:param hmm_it: lambda_it :param hmm_it: lambda_it
:param O: mot à détecter :param O: mot à détecter
:return: Le langage détecté :return: Le langage détecté et la probabilité
""" """
res_fr, _ = hmm_fr.forward(O) res_fr, _ = hmm_fr.forward(O)
res_en, _ = hmm_en.forward(O) res_en, _ = hmm_en.forward(O)
@@ -26,7 +26,7 @@ def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> st
proba_en = normalize_probabilities(res_fr, res_en, res_it, res_en) proba_en = normalize_probabilities(res_fr, res_en, res_it, res_en)
proba_it = normalize_probabilities(res_fr, res_en, res_it, res_it) proba_it = normalize_probabilities(res_fr, res_en, res_it, res_it)
print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}') #print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}')
probas: list[float] = [proba_fr, proba_en, proba_it] probas: list[float] = [proba_fr, proba_en, proba_it]
@@ -38,21 +38,21 @@ def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> st
language_index = index language_index = index
if language_index == 0: if language_index == 0:
return 'Français' return 'Français', max_prob, [proba_fr, proba_en, proba_it]
elif language_index == 1: elif language_index == 1:
return 'Anglais' return 'Anglais', max_prob, [proba_fr, proba_en, proba_it]
else: else:
return 'Italien' return 'Italien', max_prob, [proba_fr, proba_en, proba_it]
def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str): def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str) -> tuple[str, float]:
""" """
Prints the language detected with backward method Prints the language detected with backward method
:param hmm_fr: lambda_fr :param hmm_fr: lambda_fr
:param hmm_en: lambda_en :param hmm_en: lambda_en
:param hmm_it: lambda_it :param hmm_it: lambda_it
:param O: mot à détecter :param O: mot à détecter
:return: :return: Langage détecté et probabilité
""" """
res_fr = hmm_fr.backward(O) res_fr = hmm_fr.backward(O)
res_en = hmm_en.backward(O) res_en = hmm_en.backward(O)
@@ -74,11 +74,11 @@ def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str):
language_index = index language_index = index
if language_index == 0: if language_index == 0:
return 'Français' return 'Français', max_prob
elif language_index == 1: elif language_index == 1:
return 'Anglais' return 'Anglais', max_prob
else: else:
return 'Italien' return 'Italien', max_prob
def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[list[int]]) -> dict: def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[list[int]]) -> dict:
@@ -91,19 +91,19 @@ def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[l
:return: Le langage détecté :return: Le langage détecté
""" """
# Count the language détection occurrences. The max count is the answer # Count the language détection occurrences
french_count = 0 french_prob_count = english_prob_count = italian_prob_count = 0
english_count = 0
italian_count = 0
for word in O: for word in O:
lang = forward_detection(hmm_fr, hmm_en, hmm_it, word) lang, _ = forward_detection(hmm_fr, hmm_en, hmm_it, word)
match lang: match lang:
case 'Français': case 'Français':
french_count += 1 french_prob_count += 1
case 'Anglais': case 'Anglais':
english_count += 1 english_prob_count += 1
case 'Italien': case 'Italien':
italian_count += 1 italian_prob_count += 1
return {'french': french_count, 'english': english_count, 'italian': italian_count} total_sum = french_prob_count + english_prob_count + italian_prob_count
return {'french': french_prob_count / total_sum, 'english': english_prob_count / total_sum, 'italian': italian_prob_count / total_sum}