Feat: Add working agent that find to paths !
All checks were successful
SonarQube Scan / SonarQube Trigger (push) Successful in 23s
All checks were successful
SonarQube Scan / SonarQube Trigger (push) Successful in 23s
This commit is contained in:
75
ex1.py
75
ex1.py
@@ -1,23 +1,21 @@
|
|||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
def best_path(start_label: str, goal_label: str) -> List[str]:
|
|
||||||
|
def best_path(start_label: str, goal_label: str, Q: np.array) -> List[str]:
|
||||||
s = labels.index(start_label)
|
s = labels.index(start_label)
|
||||||
g = labels.index(goal_label)
|
g = labels.index(goal_label)
|
||||||
path = [start_label]
|
path = [start_label]
|
||||||
while s!= g:
|
while s!= g:
|
||||||
a = np.argmax(Q[s]) # ici, on récupère l'action
|
a = np.argmax(Q[s]) # ici, on récupère l'action la plus optimale
|
||||||
next = NEXT[s, a]
|
next_state = NEXT_MOVE_TABLE[s, a] # on récupère le prochain state
|
||||||
if next is None:
|
s = labels.index(next_state) # on récupère l'index du state dans la matrice Q
|
||||||
raise f'Action impossible State{s} Action{a} NextState{next}'
|
path.append(next_state) # On ajoute l'état futur dans le chemin
|
||||||
s = labels.index(next)
|
|
||||||
path.append(next)
|
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
labels = list("ABCDEFGHIJKL")
|
labels = list('ABCDEFGHIJKL')
|
||||||
|
|
||||||
R = np.array([
|
R = np.array([
|
||||||
# UP DOWN LEFT RIGHT
|
# UP DOWN LEFT RIGHT
|
||||||
@@ -35,11 +33,8 @@ R = np.array([
|
|||||||
[1,0,1,0], #L
|
[1,0,1,0], #L
|
||||||
], dtype=float)
|
], dtype=float)
|
||||||
|
|
||||||
# Fait une matrice de même dimension que R remplie de 0
|
# on a l'état courant et l'action en cours, il nous faut st+1 (le prochain state)
|
||||||
Q = np.zeros_like(R)
|
NEXT_MOVE_TABLE = np.array([
|
||||||
|
|
||||||
# on a l'état courant et l'action en cours, il nous faut st+1 (la prochaine action)
|
|
||||||
NEXT = np.array([
|
|
||||||
# UP DOWN LEFT RIGHT
|
# UP DOWN LEFT RIGHT
|
||||||
[None, None, None, 'B'], #A
|
[None, None, None, 'B'], #A
|
||||||
[None, 'F', 'A', 'C'], #B
|
[None, 'F', 'A', 'C'], #B
|
||||||
@@ -67,21 +62,41 @@ goal_opt1 = labels.index('C')
|
|||||||
down_index = 1
|
down_index = 1
|
||||||
goal_opt2 = labels.index('H')
|
goal_opt2 = labels.index('H')
|
||||||
left_index = 2
|
left_index = 2
|
||||||
R_goal = R.copy()
|
R_goal_e_g = R.copy()
|
||||||
R_goal[goal_opt1, down_index] = 1_000.0
|
R_goal_e_g[goal_opt1, down_index] = 1_000.0
|
||||||
R_goal[goal_opt2, left_index] = 1_000.0
|
R_goal_e_g[goal_opt2, left_index] = 1_000.0
|
||||||
|
|
||||||
for _ in range(n_iters):
|
# même chose pour le chemin qui mène à A pour faire un autre test
|
||||||
s = rng.integers(0, R.shape[0]) # random current state
|
goal_to_a = labels.index('B')
|
||||||
actions = np.where(R_goal[s] > 0)[0] # valid actions
|
R_goal_k_a = R.copy()
|
||||||
if actions.size == 0:
|
R_goal_k_a[goal_to_a, left_index] = 1_000.0
|
||||||
continue
|
|
||||||
a = rng.choice(actions) # random valid action
|
|
||||||
s_next = a # transition to next state
|
|
||||||
TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a]
|
|
||||||
Q[s, a] += alpha * TD
|
|
||||||
|
|
||||||
print("Matrice Q: ")
|
|
||||||
print(Q)
|
|
||||||
|
|
||||||
print("Path E -> G: ", " -> ".join(best_path('E', 'G')))
|
def generate_q_values(n_iters: int, R_goal: np.array) -> np.array:
|
||||||
|
# Fait une matrice de même dimension que R remplie de 0
|
||||||
|
Q = np.zeros_like(R_goal)
|
||||||
|
for _ in range(n_iters):
|
||||||
|
s = rng.integers(0, R.shape[0]) # random current state
|
||||||
|
actions = np.where(R_goal[s] > 0)[0] # valid actions
|
||||||
|
if actions.size == 0:
|
||||||
|
continue
|
||||||
|
a = rng.choice(actions) # random valid action
|
||||||
|
s_next_label: str | None = NEXT_MOVE_TABLE[s, a] # transition to next state
|
||||||
|
if s_next_label is None:
|
||||||
|
continue
|
||||||
|
s_next = labels.index(s_next_label)
|
||||||
|
TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a]
|
||||||
|
Q[s, a] += alpha * TD
|
||||||
|
return Q
|
||||||
|
|
||||||
|
Q_e_g = generate_q_values(n_iters, R_goal_e_g)
|
||||||
|
Q_k_a = generate_q_values(n_iters, R_goal_k_a)
|
||||||
|
|
||||||
|
print("Matrice Q E -> G: ")
|
||||||
|
print(Q_e_g)
|
||||||
|
|
||||||
|
print("Matrice Q K -> A")
|
||||||
|
print(Q_k_a)
|
||||||
|
|
||||||
|
print("Path E -> G: ", " -> ".join(best_path('E', 'G', Q_e_g)))
|
||||||
|
print("Path K -> A: ", " -> ".join(best_path('K', 'A', Q_k_a)))
|
||||||
|
|||||||
Reference in New Issue
Block a user