From 22381791c84a3f09adcf303e7b7246eb3272d9f4 Mon Sep 17 00:00:00 2001 From: Namu Date: Fri, 22 May 2026 16:51:47 +0200 Subject: [PATCH] feat: Add language detection for all text --- main.py | 10 +++++++++ utils/__init__.py | 53 ++++++++++++++++++++++++++++++++++------------- 2 files changed, 49 insertions(+), 14 deletions(-) diff --git a/main.py b/main.py index cd93689..4154b5e 100644 --- a/main.py +++ b/main.py @@ -41,3 +41,13 @@ if __name__ == '__main__': print('Résultat backward ---------------------------------------------------') print(f'FR={proba_back_fr}, EN={proba_back_en}, IT={proba_back_it}, Conclusion={max(proba_back_fr, proba_back_en, proba_back_it)}') + + words_text_1 = data_preparation.prepare_data('texte_1.txt') + words_text_2 = data_preparation.prepare_data('texte_2.txt') + words_text_3 = data_preparation.prepare_data('texte_3.txt') + + text_1_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_1) + text_2_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_2) + text_3_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_3) + + print(f'texte 1 {text_1_result}, texte 2 {text_2_result}, texte 3 {text_3_result}') diff --git a/utils/__init__.py b/utils/__init__.py index e8a0178..d2f7cca 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -9,18 +9,18 @@ def normalize_probabilities(prob_fr: float, prob_en: float, prob_it: float, sear return searched / sum -def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str): +def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> str: """ Prints the language detected with forward method :param hmm_fr: lambda_fr :param hmm_en: lambda_en :param hmm_it: lambda_it :param O: mot à détecter - :return: + :return: Le langage détecté """ - res_fr = hmm_fr.forward(O) - res_en = hmm_en.forward(O) - res_it = hmm_it.forward(O) + res_fr, _ = hmm_fr.forward(O) + res_en, _ = hmm_en.forward(O) + res_it, _ = hmm_it.forward(O) proba_fr = normalize_probabilities(res_fr, res_en, res_it, res_fr) proba_en = normalize_probabilities(res_fr, res_en, res_it, res_en) @@ -28,21 +28,21 @@ def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str): print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}') - probas = [proba_fr, proba_en, proba_it] + probas: list[float] = [proba_fr, proba_en, proba_it] max_prob = proba_fr language_index = 0 - for index in 1..len(probas): + for index in range(1, len(probas)): if max_prob < probas[index]: max_prob = probas[index] language_index = index if language_index == 0: - print('Français') + return 'Français' elif language_index == 1: - print('Anglais') + return 'Anglais' else: - print('Italien') + return 'Italien' def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str): @@ -68,17 +68,42 @@ def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str): max_prob = proba_fr language_index = 0 - for index in 1..len(probas): + for index in range(1,len(probas)): if max_prob < probas[index]: max_prob = probas[index] language_index = index if language_index == 0: - print('Français') + return 'Français' elif language_index == 1: - print('Anglais') + return 'Anglais' else: - print('Italien') + return 'Italien' +def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[list[int]]) -> dict: + """ + :param hmm_fr: lambda fr + :param hmm_en: lambda en + :param hmm_it: lambda it + :param O: Le texte en version index de l'alphabet + :return: Le langage détecté + """ + + # Count the language détection occurrences. The max count is the answer + french_count = 0 + english_count = 0 + italian_count = 0 + + for word in O: + lang = forward_detection(hmm_fr, hmm_en, hmm_it, word) + match lang: + case 'Français': + french_count += 1 + case 'Anglais': + english_count += 1 + case 'Italien': + italian_count += 1 + + return {'french': french_count, 'english': english_count, 'italian': italian_count}