feat: Add language detection for all text

This commit is contained in:
Namu
2026-05-22 16:51:47 +02:00
parent a794e12636
commit 22381791c8
2 changed files with 49 additions and 14 deletions

10
main.py
View File

@@ -41,3 +41,13 @@ if __name__ == '__main__':
print('Résultat backward ---------------------------------------------------')
print(f'FR={proba_back_fr}, EN={proba_back_en}, IT={proba_back_it}, Conclusion={max(proba_back_fr, proba_back_en, proba_back_it)}')
words_text_1 = data_preparation.prepare_data('texte_1.txt')
words_text_2 = data_preparation.prepare_data('texte_2.txt')
words_text_3 = data_preparation.prepare_data('texte_3.txt')
text_1_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_1)
text_2_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_2)
text_3_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_3)
print(f'texte 1 {text_1_result}, texte 2 {text_2_result}, texte 3 {text_3_result}')

View File

@@ -9,18 +9,18 @@ def normalize_probabilities(prob_fr: float, prob_en: float, prob_it: float, sear
return searched / sum
def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str):
def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> str:
"""
Prints the language detected with forward method
:param hmm_fr: lambda_fr
:param hmm_en: lambda_en
:param hmm_it: lambda_it
:param O: mot à détecter
:return:
:return: Le langage détecté
"""
res_fr = hmm_fr.forward(O)
res_en = hmm_en.forward(O)
res_it = hmm_it.forward(O)
res_fr, _ = hmm_fr.forward(O)
res_en, _ = hmm_en.forward(O)
res_it, _ = hmm_it.forward(O)
proba_fr = normalize_probabilities(res_fr, res_en, res_it, res_fr)
proba_en = normalize_probabilities(res_fr, res_en, res_it, res_en)
@@ -28,21 +28,21 @@ def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str):
print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}')
probas = [proba_fr, proba_en, proba_it]
probas: list[float] = [proba_fr, proba_en, proba_it]
max_prob = proba_fr
language_index = 0
for index in 1..len(probas):
for index in range(1, len(probas)):
if max_prob < probas[index]:
max_prob = probas[index]
language_index = index
if language_index == 0:
print('Français')
return 'Français'
elif language_index == 1:
print('Anglais')
return 'Anglais'
else:
print('Italien')
return 'Italien'
def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str):
@@ -68,17 +68,42 @@ def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str):
max_prob = proba_fr
language_index = 0
for index in 1..len(probas):
for index in range(1,len(probas)):
if max_prob < probas[index]:
max_prob = probas[index]
language_index = index
if language_index == 0:
print('Français')
return 'Français'
elif language_index == 1:
print('Anglais')
return 'Anglais'
else:
print('Italien')
return 'Italien'
def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[list[int]]) -> dict:
"""
:param hmm_fr: lambda fr
:param hmm_en: lambda en
:param hmm_it: lambda it
:param O: Le texte en version index de l'alphabet
:return: Le langage détecté
"""
# Count the language détection occurrences. The max count is the answer
french_count = 0
english_count = 0
italian_count = 0
for word in O:
lang = forward_detection(hmm_fr, hmm_en, hmm_it, word)
match lang:
case 'Français':
french_count += 1
case 'Anglais':
english_count += 1
case 'Italien':
italian_count += 1
return {'french': french_count, 'english': english_count, 'italian': italian_count}