Files
tp1-iaavancee/ex1.py
Namu 7df0e5a200
All checks were successful
SonarQube Scan / SonarQube Trigger (push) Successful in 23s
Feat: Add working agent that find to paths !
2025-09-21 23:03:59 +02:00

103 lines
2.9 KiB
Python

from typing import List
import numpy as np
def best_path(start_label: str, goal_label: str, Q: np.array) -> List[str]:
s = labels.index(start_label)
g = labels.index(goal_label)
path = [start_label]
while s!= g:
a = np.argmax(Q[s]) # ici, on récupère l'action la plus optimale
next_state = NEXT_MOVE_TABLE[s, a] # on récupère le prochain state
s = labels.index(next_state) # on récupère l'index du state dans la matrice Q
path.append(next_state) # On ajoute l'état futur dans le chemin
return path
labels = list('ABCDEFGHIJKL')
R = np.array([
# UP DOWN LEFT RIGHT
[0,0,0,1], #A
[0,1,1,1], #B
[0,1,1,0], #C
[0,1,0,0], #D
[0,1,0,0], #E
[1,1,0,0], #F
[1,0,0,1], #G
[1,1,1,0], #H
[1,0,0,1], #I
[1,0,1,1], #J
[0,0,1,1], #K
[1,0,1,0], #L
], dtype=float)
# on a l'état courant et l'action en cours, il nous faut st+1 (le prochain state)
NEXT_MOVE_TABLE = np.array([
# UP DOWN LEFT RIGHT
[None, None, None, 'B'], #A
[None, 'F', 'A', 'C'], #B
[None, 'G', 'B', None], #C
[None, 'H', None, None], #D
[None, 'I', None, None], #E
['B', 'J', None, None], #F
['C', None, None, 'H'], #G
['D', 'L', 'G', None], #H
['E', None, None, 'J'], #I
['F', None, 'I', 'K'], #J
[None, None, 'J', 'L'], #K
['H', None, 'K', None], #L
])
# Hyperparameters
gamma = 0.75
alpha = 0.90
n_iters = 1_000
rng = np.random.default_rng(0)
# augmente le reward pour les directions qui mènent à G (C DOWN & H LEFT)
goal_opt1 = labels.index('C')
down_index = 1
goal_opt2 = labels.index('H')
left_index = 2
R_goal_e_g = R.copy()
R_goal_e_g[goal_opt1, down_index] = 1_000.0
R_goal_e_g[goal_opt2, left_index] = 1_000.0
# même chose pour le chemin qui mène à A pour faire un autre test
goal_to_a = labels.index('B')
R_goal_k_a = R.copy()
R_goal_k_a[goal_to_a, left_index] = 1_000.0
def generate_q_values(n_iters: int, R_goal: np.array) -> np.array:
# Fait une matrice de même dimension que R remplie de 0
Q = np.zeros_like(R_goal)
for _ in range(n_iters):
s = rng.integers(0, R.shape[0]) # random current state
actions = np.where(R_goal[s] > 0)[0] # valid actions
if actions.size == 0:
continue
a = rng.choice(actions) # random valid action
s_next_label: str | None = NEXT_MOVE_TABLE[s, a] # transition to next state
if s_next_label is None:
continue
s_next = labels.index(s_next_label)
TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a]
Q[s, a] += alpha * TD
return Q
Q_e_g = generate_q_values(n_iters, R_goal_e_g)
Q_k_a = generate_q_values(n_iters, R_goal_k_a)
print("Matrice Q E -> G: ")
print(Q_e_g)
print("Matrice Q K -> A")
print(Q_k_a)
print("Path E -> G: ", " -> ".join(best_path('E', 'G', Q_e_g)))
print("Path K -> A: ", " -> ".join(best_path('K', 'A', Q_k_a)))