fix: correct the text language detection
This commit is contained in:
1
main.py
1
main.py
@@ -50,4 +50,5 @@ if __name__ == '__main__':
|
|||||||
text_2_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_2)
|
text_2_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_2)
|
||||||
text_3_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_3)
|
text_3_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_3)
|
||||||
|
|
||||||
|
print('Résultat sur les textes ----------------------------------------------')
|
||||||
print(f'texte 1 {text_1_result}, texte 2 {text_2_result}, texte 3 {text_3_result}')
|
print(f'texte 1 {text_1_result}, texte 2 {text_2_result}, texte 3 {text_3_result}')
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
"""
|
"""
|
||||||
Ce module contient des fonctions utilisataires
|
Ce module contient des fonctions utilitaires
|
||||||
"""
|
"""
|
||||||
from HMM import HMM
|
from HMM import HMM
|
||||||
|
|
||||||
@@ -9,14 +9,14 @@ def normalize_probabilities(prob_fr: float, prob_en: float, prob_it: float, sear
|
|||||||
return searched / sum
|
return searched / sum
|
||||||
|
|
||||||
|
|
||||||
def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> str:
|
def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> tuple[str, float, list[float]]:
|
||||||
"""
|
"""
|
||||||
Prints the language detected with forward method
|
Prints the language detected with forward method
|
||||||
:param hmm_fr: lambda_fr
|
:param hmm_fr: lambda_fr
|
||||||
:param hmm_en: lambda_en
|
:param hmm_en: lambda_en
|
||||||
:param hmm_it: lambda_it
|
:param hmm_it: lambda_it
|
||||||
:param O: mot à détecter
|
:param O: mot à détecter
|
||||||
:return: Le langage détecté
|
:return: Le langage détecté et la probabilité
|
||||||
"""
|
"""
|
||||||
res_fr, _ = hmm_fr.forward(O)
|
res_fr, _ = hmm_fr.forward(O)
|
||||||
res_en, _ = hmm_en.forward(O)
|
res_en, _ = hmm_en.forward(O)
|
||||||
@@ -26,7 +26,7 @@ def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> st
|
|||||||
proba_en = normalize_probabilities(res_fr, res_en, res_it, res_en)
|
proba_en = normalize_probabilities(res_fr, res_en, res_it, res_en)
|
||||||
proba_it = normalize_probabilities(res_fr, res_en, res_it, res_it)
|
proba_it = normalize_probabilities(res_fr, res_en, res_it, res_it)
|
||||||
|
|
||||||
print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}')
|
#print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}')
|
||||||
|
|
||||||
probas: list[float] = [proba_fr, proba_en, proba_it]
|
probas: list[float] = [proba_fr, proba_en, proba_it]
|
||||||
|
|
||||||
@@ -38,21 +38,21 @@ def forward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[int]) -> st
|
|||||||
language_index = index
|
language_index = index
|
||||||
|
|
||||||
if language_index == 0:
|
if language_index == 0:
|
||||||
return 'Français'
|
return 'Français', max_prob, [proba_fr, proba_en, proba_it]
|
||||||
elif language_index == 1:
|
elif language_index == 1:
|
||||||
return 'Anglais'
|
return 'Anglais', max_prob, [proba_fr, proba_en, proba_it]
|
||||||
else:
|
else:
|
||||||
return 'Italien'
|
return 'Italien', max_prob, [proba_fr, proba_en, proba_it]
|
||||||
|
|
||||||
|
|
||||||
def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str):
|
def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str) -> tuple[str, float]:
|
||||||
"""
|
"""
|
||||||
Prints the language detected with backward method
|
Prints the language detected with backward method
|
||||||
:param hmm_fr: lambda_fr
|
:param hmm_fr: lambda_fr
|
||||||
:param hmm_en: lambda_en
|
:param hmm_en: lambda_en
|
||||||
:param hmm_it: lambda_it
|
:param hmm_it: lambda_it
|
||||||
:param O: mot à détecter
|
:param O: mot à détecter
|
||||||
:return:
|
:return: Langage détecté et probabilité
|
||||||
"""
|
"""
|
||||||
res_fr = hmm_fr.backward(O)
|
res_fr = hmm_fr.backward(O)
|
||||||
res_en = hmm_en.backward(O)
|
res_en = hmm_en.backward(O)
|
||||||
@@ -74,11 +74,11 @@ def backward_detection(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: str):
|
|||||||
language_index = index
|
language_index = index
|
||||||
|
|
||||||
if language_index == 0:
|
if language_index == 0:
|
||||||
return 'Français'
|
return 'Français', max_prob
|
||||||
elif language_index == 1:
|
elif language_index == 1:
|
||||||
return 'Anglais'
|
return 'Anglais', max_prob
|
||||||
else:
|
else:
|
||||||
return 'Italien'
|
return 'Italien', max_prob
|
||||||
|
|
||||||
|
|
||||||
def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[list[int]]) -> dict:
|
def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[list[int]]) -> dict:
|
||||||
@@ -91,19 +91,19 @@ def forward_detection_with_text(hmm_fr: HMM, hmm_en: HMM, hmm_it: HMM, O: list[l
|
|||||||
:return: Le langage détecté
|
:return: Le langage détecté
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Count the language détection occurrences. The max count is the answer
|
# Count the language détection occurrences
|
||||||
french_count = 0
|
french_prob_count = english_prob_count = italian_prob_count = 0
|
||||||
english_count = 0
|
|
||||||
italian_count = 0
|
|
||||||
|
|
||||||
for word in O:
|
for word in O:
|
||||||
lang = forward_detection(hmm_fr, hmm_en, hmm_it, word)
|
lang, _ = forward_detection(hmm_fr, hmm_en, hmm_it, word)
|
||||||
match lang:
|
match lang:
|
||||||
case 'Français':
|
case 'Français':
|
||||||
french_count += 1
|
french_prob_count += 1
|
||||||
case 'Anglais':
|
case 'Anglais':
|
||||||
english_count += 1
|
english_prob_count += 1
|
||||||
case 'Italien':
|
case 'Italien':
|
||||||
italian_count += 1
|
italian_prob_count += 1
|
||||||
|
|
||||||
return {'french': french_count, 'english': english_count, 'italian': italian_count}
|
total_sum = french_prob_count + english_prob_count + italian_prob_count
|
||||||
|
|
||||||
|
return {'french': french_prob_count / total_sum, 'english': english_prob_count / total_sum, 'italian': italian_prob_count / total_sum}
|
||||||
|
|||||||
Reference in New Issue
Block a user