First commit

2025-09-19 18:20:43 +02:00
commit 3d8dab7f06
6 changed files with 226 additions and 0 deletions
--- a/.gitea/workflows/sonar.yml
+++ b/.gitea/workflows/sonar.yml
@@ -0,0 +1,32 @@
+name: SonarQube Scan
+
+on:
+  push:
+    branches:
+      - '**'
+  pull_request:
+    branches:
+      - '**'
+
+jobs:
+  sonarqube:
+    name: SonarQube Trigger
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Download SonarQube Scanner
+        run: |
+          curl -sSLo sonar-scanner.zip https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-5.0.1.3006-linux.zip
+          unzip sonar-scanner.zip
+
+      - name: Run SonarQube Scan
+        run: |
+          ./sonar-scanner-*/bin/sonar-scanner \
+            -Dsonar.projectKey=tp1-iaavancee \
+            -Dsonar.sources=. \
+            -Dsonar.host.url=${{ secrets.SONARQUBE_HOST }} \
+            -Dsonar.login=${{ secrets.SONARQUBE_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
+# Pycharm
+.idea/
+
+# virtual env
+.venv/
--- a/README.md
+++ b/README.md
@@ -0,0 +1,34 @@
+# TP1 IA Avancée
+
+Le but de ce tp est de faire se déplacer une IA dans un entrepot en lui donnant un point de départ et d'arrivé.
+
+## Install
+
+Tout d'abord créer un venv nommé `venv`
+
+```bash
+python -m venv venv
+```
+
+Puis pour activer le venv :
+
+- Windows :
+```powershell
+venv\Scripts\activate
+```
+- Linux :
+```bash
+source venv/bin/activate
+```
+- MacOS :
+Je sais pas, GLHF
+
+Pour installer les librairies dans un venv, utilisez cette commande :
+
+```bash
+pip install -r requirements.txt
+```
+
+## Authors
+
+[Thomas SAZERAT]()
--- a/ex1.py
+++ b/ex1.py
@@ -0,0 +1,87 @@
+import numpy as np
+import pandas as pd
+from typing import List
+
+
+def best_path(start_label: str, goal_label: str) -> List[str]:
+    s = labels.index(start_label)
+    g = labels.index(goal_label)
+    path = [start_label]
+    while s!= g:
+        a = np.argmax(Q[s]) # ici, on récupère l'action
+        next = NEXT[s, a]
+        if next is None:
+            raise f'Action impossible State{s} Action{a} NextState{next}'
+        s = labels.index(next)
+        path.append(next)
+    return path
+
+
+labels = list("ABCDEFGHIJKL")
+
+R = np.array([
+    # UP DOWN LEFT RIGHT
+    [0,0,0,1], #A
+    [0,1,1,1], #B
+    [0,1,1,0], #C
+    [0,1,0,0], #D
+    [0,1,0,0], #E
+    [1,1,0,0], #F
+    [1,0,0,1], #G
+    [1,1,1,0], #H
+    [1,0,0,1], #I
+    [1,0,1,1], #J
+    [0,0,1,1], #K
+    [1,0,1,0], #L
+], dtype=float)
+
+# Fait une matrice de même dimension que R remplie de 0
+Q = np.zeros_like(R)
+
+# on a l'état courant et l'action en cours, il nous faut st+1 (la prochaine action)
+NEXT = np.array([
+    # UP DOWN LEFT RIGHT
+    [None, None, None, 'B'], #A
+    [None, 'F', 'A', 'C'], #B
+    [None, 'G', 'B', None], #C
+    [None, 'H', None, None], #D
+    [None, 'I', None, None], #E
+    ['B', 'J', None, None], #F
+    ['C', None, None, 'H'], #G
+    ['D', 'L', 'G', None], #H
+    ['E', None, None, 'J'], #I
+    ['F', None, 'I', 'K'], #J
+    [None, None, 'J', 'L'], #K
+    ['H', None, 'K', None], #L
+])
+
+# Hyperparameters
+gamma = 0.75
+alpha = 0.90
+n_iters = 1_000
+
+rng = np.random.default_rng(0)
+
+# augmente le reward pour les directions qui mènent à G (C DOWN & H LEFT)
+goal_opt1 = labels.index('C')
+down_index = 1
+goal_opt2 = labels.index('H')
+left_index = 2
+R_goal = R.copy()
+R_goal[goal_opt1, down_index] = 1_000.0
+R_goal[goal_opt2, left_index] = 1_000.0
+
+for _ in range(n_iters):
+    s = rng.integers(0, R.shape[0]) # random current state
+    actions = np.where(R_goal[s] > 0)[0] # valid actions
+    if actions.size == 0:
+        continue
+    a = rng.choice(actions) # random valid action
+    s_next = a # transition to next state
+    TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a]
+    Q[s, a] += alpha * TD
+
+print("Matrice Q: ")
+print(Q)
+
+print("Path E -> G: ", " -> ".join(best_path('E', 'G')))
--- a/requirements.txt
+++ b/requirements.txt
--- a/sample.py
+++ b/sample.py
@@ -0,0 +1,68 @@
+import numpy as np
+import pandas as pd
+
+labels = list("ABCDEFGHIJKL")
+
+R = np.array([
+    [0,1,0,0,0,0,0,0,0,0,0,0], #A
+    [1,0,1,0,0,1,0,0,0,0,0,0], #B
+    [0,1,0,0,0,0,1,0,0,0,0,0], #C
+    [0,0,0,0,0,0,0,1,0,0,0,0], #D
+    [0,0,0,0,0,0,0,0,1,0,0,0], #E
+    [0,1,0,0,0,0,0,0,0,1,0,0], #F
+    [0,0,1,0,0,0,0,1,0,0,0,0], #G
+    [0,0,0,1,0,0,1,0,0,0,0,1], #H
+    [0,0,0,0,1,0,0,0,0,1,0,0], #I
+    [0,0,0,0,0,1,0,0,1,0,1,0], #J
+    [0,0,0,0,0,0,0,0,0,1,0,1], #K
+    [0,0,0,0,0,0,0,1,0,0,1,0], #L
+], dtype=float)
+
+# Fait une matrice de même dimension que R remplie de 0
+Q = np.zeros_like(R)
+
+print(type(R)) # recup le type
+print(R.ndim) # 2 -> matrice 2d
+print(R.shape) # (3, 3) -> 3 lignes 3 colonnes
+print(R.dtype) # float64
+print(R.size) # 9 éléments
+print(R.strides) # e.g. (24,8)
+
+# huperparamètre
+
+gamma = 0.75
+alpha = 0.90
+n_iters = 1000
+
+rng = np.random.default_rng(0)
+
+# Train Q-Learning for goal 'G'
+goal_label = 'G'
+goal = labels.index(goal_label)
+R_goal = R.copy()
+R_goal[goal, goal] = 1000.0
+
+for _ in range(n_iters):
+    s = rng.integers(0, R.shape[0]) # random current state
+    actions = np.where(R_goal[s] > 0)[0] # valid actions
+    if actions.size == 0:
+        continue
+    a = rng.choice(actions) # random valid action
+    s_next = a # transition to next state
+    # Calcul du time difference
+    TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a]
+    # Equation de Bellman
+    Q[s, a] += alpha * TD
+
+
+def best_path(start_label: str, goal_label: str):
+    s = labels.index(start_label)
+    g = labels.index(goal_label)
+    path = [start_label]
+    while s!= g:
+        s = np.argmax(Q[s]) # ici, on récupère l'action
+        path.append(labels[s])
+    return path
+
+
+print("Path E -> G: ", " -> ".join(best_path('E', 'G')))