66 lines
3.4 KiB
Python
66 lines
3.4 KiB
Python
"""
|
|
Note: I code in english but comment in French !
|
|
"""
|
|
import data_preparation
|
|
from HMM import HMM
|
|
import utils
|
|
|
|
|
|
if __name__ == '__main__':
|
|
numeric_french_text = data_preparation.prepare_data('french.txt')
|
|
numeric_english_text = data_preparation.prepare_data('english.txt')
|
|
numeric_italian_text = data_preparation.prepare_data('italian.txt')
|
|
|
|
lambda_fr = HMM('matrice_emission.xls', numeric_french_text)
|
|
lambda_en = HMM('matrice_emission.xls', numeric_english_text)
|
|
lambda_it = HMM('matrice_emission.xls', numeric_italian_text)
|
|
|
|
numeric_french_word = data_preparation.get_text_in_alphabet_index_form('probablement')
|
|
|
|
# On prend le premier mot de la liste (pomme)
|
|
word = numeric_french_word[0]
|
|
|
|
res_fr, _ = lambda_fr.forward(word)
|
|
res_en, _ = lambda_en.forward(word)
|
|
res_it, _ = lambda_it.forward(word)
|
|
|
|
proba_fr = utils.normalize_probabilities(res_fr, res_en, res_it, res_fr)
|
|
proba_en = utils.normalize_probabilities(res_fr, res_en, res_it, res_en)
|
|
proba_it = utils.normalize_probabilities(res_fr, res_en, res_it, res_it)
|
|
|
|
print('Résultats forward ---------------------------------------------------')
|
|
print(f'FR={proba_fr}, EN={proba_en}, IT={proba_it}, Conclusion={max(proba_fr, proba_en, proba_it)}')
|
|
|
|
res_back_fr, _ = lambda_fr.backward(word)
|
|
res_back_en, _ = lambda_en.backward(word)
|
|
res_back_it, _ = lambda_it.backward(word)
|
|
|
|
proba_back_fr = utils.normalize_probabilities(res_back_fr, res_back_en, res_back_it, res_back_fr)
|
|
proba_back_en = utils.normalize_probabilities(res_back_fr, res_back_en, res_back_it, res_back_en)
|
|
proba_back_it = utils.normalize_probabilities(res_back_fr, res_back_en, res_back_it, res_back_it)
|
|
|
|
print('Résultat backward ---------------------------------------------------')
|
|
print(f'FR={proba_back_fr}, EN={proba_back_en}, IT={proba_back_it}, Conclusion={max(proba_back_fr, proba_back_en, proba_back_it)}')
|
|
|
|
words_text_1 = data_preparation.prepare_data('texte_1.txt')
|
|
words_text_2 = data_preparation.prepare_data('texte_2.txt')
|
|
words_text_3 = data_preparation.prepare_data('texte_3.txt')
|
|
|
|
text_1_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_1)
|
|
text_2_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_2)
|
|
text_3_result = utils.forward_detection_with_text(lambda_fr, lambda_en, lambda_it, words_text_3)
|
|
|
|
print('Résultat sur les textes ----------------------------------------------')
|
|
print(f'texte 1 {text_1_result}, texte 2 {text_2_result}, texte 3 {text_3_result}')
|
|
|
|
lambda_fr_identity = HMM(numeric_text=numeric_french_text, emission_matrix_file_name=None)
|
|
lambda_en_identity = HMM(numeric_text=numeric_english_text, emission_matrix_file_name=None)
|
|
lambda_it_identity = HMM(numeric_text=numeric_italian_text, emission_matrix_file_name=None)
|
|
|
|
text_1_result = utils.forward_detection_with_text(lambda_fr_identity, lambda_en_identity, lambda_it_identity, words_text_1)
|
|
text_2_result = utils.forward_detection_with_text(lambda_fr_identity, lambda_en_identity, lambda_it_identity, words_text_2)
|
|
text_3_result = utils.forward_detection_with_text(lambda_fr_identity, lambda_en_identity, lambda_it_identity, words_text_3)
|
|
|
|
print('Résultat avec une matrice identité -----------------------------------')
|
|
print(f'texte 1 {text_1_result}, texte 2 {text_2_result}, texte 3 {text_3_result}')
|