Feat: Add working agent that find to paths !

2025-09-21 23:03:59 +02:00
parent a0b3068108
commit 7df0e5a200
1 changed files with 45 additions and 30 deletions
--- a/ex1.py
+++ b/ex1.py
@@ -1,23 +1,21 @@
-import numpy as np
-import pandas as pd
 from typing import List

+import numpy as np

-def best_path(start_label: str, goal_label: str) -> List[str]:
+
+def best_path(start_label: str, goal_label: str, Q: np.array) -> List[str]:
    s = labels.index(start_label)
    g = labels.index(goal_label)
    path = [start_label]
    while s!= g:
-        a = np.argmax(Q[s]) # ici, on récupère l'action
-        next = NEXT[s, a]
-        if next is None:
-            raise f'Action impossible State{s} Action{a} NextState{next}'
-        s = labels.index(next)
-        path.append(next)
+        a = np.argmax(Q[s]) # ici, on récupère l'action la plus optimale
+        next_state = NEXT_MOVE_TABLE[s, a] # on récupère le prochain state
+        s = labels.index(next_state) # on récupère l'index du state dans la matrice Q
+        path.append(next_state) # On ajoute l'état futur dans le chemin
    return path


-labels = list("ABCDEFGHIJKL")
+labels = list('ABCDEFGHIJKL')

 R = np.array([
    # UP DOWN LEFT RIGHT
@@ -35,11 +33,8 @@ R = np.array([
    [1,0,1,0], #L
 ], dtype=float)

-# Fait une matrice de même dimension que R remplie de 0
-Q = np.zeros_like(R)
-
-# on a l'état courant et l'action en cours, il nous faut st+1 (la prochaine action)
-NEXT = np.array([
+# on a l'état courant et l'action en cours, il nous faut st+1 (le prochain state)
+NEXT_MOVE_TABLE = np.array([
    # UP DOWN LEFT RIGHT
    [None, None, None, 'B'], #A
    [None, 'F', 'A', 'C'], #B
@@ -67,21 +62,41 @@ goal_opt1 = labels.index('C')
 down_index = 1
 goal_opt2 = labels.index('H')
 left_index = 2
-R_goal = R.copy()
-R_goal[goal_opt1, down_index] = 1_000.0
-R_goal[goal_opt2, left_index] = 1_000.0
+R_goal_e_g = R.copy()
+R_goal_e_g[goal_opt1, down_index] = 1_000.0
+R_goal_e_g[goal_opt2, left_index] = 1_000.0

-for _ in range(n_iters):
-    s = rng.integers(0, R.shape[0]) # random current state
-    actions = np.where(R_goal[s] > 0)[0] # valid actions
-    if actions.size == 0:
-        continue
-    a = rng.choice(actions) # random valid action
-    s_next = a # transition to next state
-    TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a]
-    Q[s, a] += alpha * TD
+# même chose pour le chemin qui mène à A pour faire un autre test
+goal_to_a = labels.index('B')
+R_goal_k_a = R.copy()
+R_goal_k_a[goal_to_a, left_index] = 1_000.0

-print("Matrice Q: ")
-print(Q)

-print("Path E -> G: ", " -> ".join(best_path('E', 'G')))
+def generate_q_values(n_iters: int, R_goal: np.array) -> np.array:
+    # Fait une matrice de même dimension que R remplie de 0
+    Q = np.zeros_like(R_goal)
+    for _ in range(n_iters):
+        s = rng.integers(0, R.shape[0]) # random current state
+        actions = np.where(R_goal[s] > 0)[0] # valid actions
+        if actions.size == 0:
+            continue
+        a = rng.choice(actions) # random valid action
+        s_next_label: str | None = NEXT_MOVE_TABLE[s, a] # transition to next state
+        if s_next_label is None:
+            continue
+        s_next = labels.index(s_next_label)
+        TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a]
+        Q[s, a] += alpha * TD
+    return Q
+
+Q_e_g = generate_q_values(n_iters, R_goal_e_g)
+Q_k_a = generate_q_values(n_iters, R_goal_k_a)
+
+print("Matrice Q E -> G: ")
+print(Q_e_g)
+
+print("Matrice Q K -> A")
+print(Q_k_a)
+
+print("Path E -> G: ", " -> ".join(best_path('E', 'G', Q_e_g)))
+print("Path K -> A: ", " -> ".join(best_path('K', 'A', Q_k_a)))