From 76121b8c683e78958203d5d149f6b47960890491 Mon Sep 17 00:00:00 2001 From: Namu Date: Sat, 23 May 2026 15:39:50 +0200 Subject: [PATCH] fix: correct the text language detection --- main.py | 1 + utils/__init__.py | 42 +++++++++++++++++++++--------------------- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/main.py b/main.py index 4154b5e..92802e1 100644 --- a/main.py +++ b/main.py @@ -50,4 +50,5 @@ if __name__ == '__main__': text_2_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_2) text_3_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_3) + print('Résultat sur les textes ----------------------------------------------') print(f'texte 1 {text_1_result}, texte 2 {text_2_result}, texte 3 {text_3_result}') diff --git a/utils/__init__.py b/utils/__init__.py index d2f7cca..b6ce085 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,5 +1,5 @@ """ -Ce module contient des fonctions utilisataires +Ce module contient des fonctions utilitaires """ from HMM import HMM @@ -9,14 +9,14 @@ def normalize_probabilities(prob_fr: float, prob_en: float, prob_it: float, sear return searched / sum -def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> str: +def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> tuple[str, float, list[float]]: """ Prints the language detected with forward method :param hmm_fr: lambda_fr :param hmm_en: lambda_en :param hmm_it: lambda_it :param O: mot à détecter - :return: Le langage détecté + :return: Le langage détecté et la probabilité """ res_fr, _ = hmm_fr.forward(O) res_en, _ = hmm_en.forward(O) @@ -26,7 +26,7 @@ def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> st proba_en = normalize_probabilities(res_fr, res_en, res_it, res_en) proba_it = normalize_probabilities(res_fr, res_en, res_it, res_it) - print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}') + #print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}') probas: list[float] = [proba_fr, proba_en, proba_it] @@ -38,21 +38,21 @@ def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> st language_index = index if language_index == 0: - return 'Français' + return 'Français', max_prob, [proba_fr, proba_en, proba_it] elif language_index == 1: - return 'Anglais' + return 'Anglais', max_prob, [proba_fr, proba_en, proba_it] else: - return 'Italien' + return 'Italien', max_prob, [proba_fr, proba_en, proba_it] -def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str): +def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str) -> tuple[str, float]: """ Prints the language detected with backward method :param hmm_fr: lambda_fr :param hmm_en: lambda_en :param hmm_it: lambda_it :param O: mot à détecter - :return: + :return: Langage détecté et probabilité """ res_fr = hmm_fr.backward(O) res_en = hmm_en.backward(O) @@ -74,11 +74,11 @@ def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str): language_index = index if language_index == 0: - return 'Français' + return 'Français', max_prob elif language_index == 1: - return 'Anglais' + return 'Anglais', max_prob else: - return 'Italien' + return 'Italien', max_prob def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[list[int]]) -> dict: @@ -91,19 +91,19 @@ def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[l :return: Le langage détecté """ - # Count the language détection occurrences. The max count is the answer - french_count = 0 - english_count = 0 - italian_count = 0 + # Count the language détection occurrences + french_prob_count = english_prob_count = italian_prob_count = 0 for word in O: - lang = forward_detection(hmm_fr, hmm_en, hmm_it, word) + lang, _ = forward_detection(hmm_fr, hmm_en, hmm_it, word) match lang: case 'Français': - french_count += 1 + french_prob_count += 1 case 'Anglais': - english_count += 1 + english_prob_count += 1 case 'Italien': - italian_count += 1 + italian_prob_count += 1 - return {'french': french_count, 'english': english_count, 'italian': italian_count} + total_sum = french_prob_count + english_prob_count + italian_prob_count + + return {'french': french_prob_count / total_sum, 'english': english_prob_count / total_sum, 'italian': italian_prob_count / total_sum}