110 lines
3.1 KiB
Python
110 lines
3.1 KiB
Python
"""
|
|
Ce module contient des fonctions utilisataires
|
|
"""
|
|
from HMM import HMM
|
|
|
|
|
|
def normalize_probabilities(prob_fr: float, prob_en: float, prob_it: float, searched: float) -> float:
|
|
sum = prob_fr + prob_en + prob_it
|
|
return searched / sum
|
|
|
|
|
|
def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> str:
|
|
"""
|
|
Prints the language detected with forward method
|
|
:param hmm_fr: lambda_fr
|
|
:param hmm_en: lambda_en
|
|
:param hmm_it: lambda_it
|
|
:param O: mot à détecter
|
|
:return: Le langage détecté
|
|
"""
|
|
res_fr, _ = hmm_fr.forward(O)
|
|
res_en, _ = hmm_en.forward(O)
|
|
res_it, _ = hmm_it.forward(O)
|
|
|
|
proba_fr = normalize_probabilities(res_fr, res_en, res_it, res_fr)
|
|
proba_en = normalize_probabilities(res_fr, res_en, res_it, res_en)
|
|
proba_it = normalize_probabilities(res_fr, res_en, res_it, res_it)
|
|
|
|
print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}')
|
|
|
|
probas: list[float] = [proba_fr, proba_en, proba_it]
|
|
|
|
max_prob = proba_fr
|
|
language_index = 0
|
|
for index in range(1, len(probas)):
|
|
if max_prob < probas[index]:
|
|
max_prob = probas[index]
|
|
language_index = index
|
|
|
|
if language_index == 0:
|
|
return 'Français'
|
|
elif language_index == 1:
|
|
return 'Anglais'
|
|
else:
|
|
return 'Italien'
|
|
|
|
|
|
def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str):
|
|
"""
|
|
Prints the language detected with backward method
|
|
:param hmm_fr: lambda_fr
|
|
:param hmm_en: lambda_en
|
|
:param hmm_it: lambda_it
|
|
:param O: mot à détecter
|
|
:return:
|
|
"""
|
|
res_fr = hmm_fr.backward(O)
|
|
res_en = hmm_en.backward(O)
|
|
res_it = hmm_it.backward(O)
|
|
|
|
proba_fr = normalize_probabilities(res_fr, res_en, res_it, res_fr)
|
|
proba_en = normalize_probabilities(res_fr, res_en, res_it, res_en)
|
|
proba_it = normalize_probabilities(res_fr, res_en, res_it, res_it)
|
|
|
|
print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}')
|
|
|
|
probas = [proba_fr, proba_en, proba_it]
|
|
|
|
max_prob = proba_fr
|
|
language_index = 0
|
|
for index in range(1,len(probas)):
|
|
if max_prob < probas[index]:
|
|
max_prob = probas[index]
|
|
language_index = index
|
|
|
|
if language_index == 0:
|
|
return 'Français'
|
|
elif language_index == 1:
|
|
return 'Anglais'
|
|
else:
|
|
return 'Italien'
|
|
|
|
|
|
def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[list[int]]) -> dict:
|
|
"""
|
|
|
|
:param hmm_fr: lambda fr
|
|
:param hmm_en: lambda en
|
|
:param hmm_it: lambda it
|
|
:param O: Le texte en version index de l'alphabet
|
|
:return: Le langage détecté
|
|
"""
|
|
|
|
# Count the language détection occurrences. The max count is the answer
|
|
french_count = 0
|
|
english_count = 0
|
|
italian_count = 0
|
|
|
|
for word in O:
|
|
lang = forward_detection(hmm_fr, hmm_en, hmm_it, word)
|
|
match lang:
|
|
case 'Français':
|
|
french_count += 1
|
|
case 'Anglais':
|
|
english_count += 1
|
|
case 'Italien':
|
|
italian_count += 1
|
|
|
|
return {'french': french_count, 'english': english_count, 'italian': italian_count}
|