First commit

2025-09-19 18:20:43 +02:00
commit 3d8dab7f06
6 changed files with 226 additions and 0 deletions
--- a/.gitea/workflows/sonar.yml
+++ b/.gitea/workflows/sonar.yml
@@ -0,0 +1,32 @@
 name: SonarQube Scan
 on:
  push:
    branches:
      - '**'
  pull_request:
    branches:
      - '**'
 jobs:
  sonarqube:
    name: SonarQube Trigger
    runs-on: ubuntu-latest
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Download SonarQube Scanner
        run: |
          curl -sSLo sonar-scanner.zip https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-5.0.1.3006-linux.zip
          unzip sonar-scanner.zip
      - name: Run SonarQube Scan
        run: |
          ./sonar-scanner-*/bin/sonar-scanner \
            -Dsonar.projectKey=tp1-iaavancee \
            -Dsonar.sources=. \
            -Dsonar.host.url=${{ secrets.SONARQUBE_HOST }} \
            -Dsonar.login=${{ secrets.SONARQUBE_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,5 @@
 # Pycharm
 .idea/
 # virtual env
 .venv/
--- a/README.md
+++ b/README.md
@@ -0,0 +1,34 @@
 # TP1 IA Avancée
 Le but de ce tp est de faire se déplacer une IA dans un entrepot en lui donnant un point de départ et d'arrivé.
 ## Install
 Tout d'abord créer un venv nommé `venv`
 ```bash
 python -m venv venv
 ```
 Puis pour activer le venv :
 - Windows :
 ```powershell
 venv\Scripts\activate
 ```
 - Linux :
 ```bash
 source venv/bin/activate
 ```
 - MacOS :
 Je sais pas, GLHF
 Pour installer les librairies dans un venv, utilisez cette commande :
 ```bash
 pip install -r requirements.txt
 ```
 ## Authors
 [Thomas SAZERAT]()
--- a/ex1.py
+++ b/ex1.py
@@ -0,0 +1,87 @@
 import numpy as np
 import pandas as pd
 from typing import List
 def best_path(start_label: str, goal_label: str) -> List[str]:
    s = labels.index(start_label)
    g = labels.index(goal_label)
    path = [start_label]
    while s!= g:
        a = np.argmax(Q[s]) # ici, on récupère l'action
        next = NEXT[s, a]
        if next is None:
            raise f'Action impossible State{s} Action{a} NextState{next}'
        s = labels.index(next)
        path.append(next)
    return path
 labels = list("ABCDEFGHIJKL")
 R = np.array([
    # UP DOWN LEFT RIGHT
    [0,0,0,1], #A
    [0,1,1,1], #B
    [0,1,1,0], #C
    [0,1,0,0], #D
    [0,1,0,0], #E
    [1,1,0,0], #F
    [1,0,0,1], #G
    [1,1,1,0], #H
    [1,0,0,1], #I
    [1,0,1,1], #J
    [0,0,1,1], #K
    [1,0,1,0], #L
 ], dtype=float)
 # Fait une matrice de même dimension que R remplie de 0
 Q = np.zeros_like(R)
 # on a l'état courant et l'action en cours, il nous faut st+1 (la prochaine action)
 NEXT = np.array([
    # UP DOWN LEFT RIGHT
    [None, None, None, 'B'], #A
    [None, 'F', 'A', 'C'], #B
    [None, 'G', 'B', None], #C
    [None, 'H', None, None], #D
    [None, 'I', None, None], #E
    ['B', 'J', None, None], #F
    ['C', None, None, 'H'], #G
    ['D', 'L', 'G', None], #H
    ['E', None, None, 'J'], #I
    ['F', None, 'I', 'K'], #J
    [None, None, 'J', 'L'], #K
    ['H', None, 'K', None], #L
 ])
 # Hyperparameters
 gamma = 0.75
 alpha = 0.90
 n_iters = 1_000
 rng = np.random.default_rng(0)
 # augmente le reward pour les directions qui mènent à G (C DOWN & H LEFT)
 goal_opt1 = labels.index('C')
 down_index = 1
 goal_opt2 = labels.index('H')
 left_index = 2
 R_goal = R.copy()
 R_goal[goal_opt1, down_index] = 1_000.0
 R_goal[goal_opt2, left_index] = 1_000.0
 for _ in range(n_iters):
    s = rng.integers(0, R.shape[0]) # random current state
    actions = np.where(R_goal[s] > 0)[0] # valid actions
    if actions.size == 0:
        continue
    a = rng.choice(actions) # random valid action
    s_next = a # transition to next state
    TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a]
    Q[s, a] += alpha * TD
 print("Matrice Q: ")
 print(Q)
 print("Path E -> G: ", " -> ".join(best_path('E', 'G')))
--- a/requirements.txt
+++ b/requirements.txt
--- a/sample.py
+++ b/sample.py
@@ -0,0 +1,68 @@
 import numpy as np
 import pandas as pd
 labels = list("ABCDEFGHIJKL")
 R = np.array([
    [0,1,0,0,0,0,0,0,0,0,0,0], #A
    [1,0,1,0,0,1,0,0,0,0,0,0], #B
    [0,1,0,0,0,0,1,0,0,0,0,0], #C
    [0,0,0,0,0,0,0,1,0,0,0,0], #D
    [0,0,0,0,0,0,0,0,1,0,0,0], #E
    [0,1,0,0,0,0,0,0,0,1,0,0], #F
    [0,0,1,0,0,0,0,1,0,0,0,0], #G
    [0,0,0,1,0,0,1,0,0,0,0,1], #H
    [0,0,0,0,1,0,0,0,0,1,0,0], #I
    [0,0,0,0,0,1,0,0,1,0,1,0], #J
    [0,0,0,0,0,0,0,0,0,1,0,1], #K
    [0,0,0,0,0,0,0,1,0,0,1,0], #L
 ], dtype=float)
 # Fait une matrice de même dimension que R remplie de 0
 Q = np.zeros_like(R)
 print(type(R)) # recup le type
 print(R.ndim) # 2 -> matrice 2d
 print(R.shape) # (3, 3) -> 3 lignes 3 colonnes
 print(R.dtype) # float64
 print(R.size) # 9 éléments
 print(R.strides) # e.g. (24,8)
 # huperparamètre
 gamma = 0.75
 alpha = 0.90
 n_iters = 1000
 rng = np.random.default_rng(0)
 # Train Q-Learning for goal 'G'
 goal_label = 'G'
 goal = labels.index(goal_label)
 R_goal = R.copy()
 R_goal[goal, goal] = 1000.0
 for _ in range(n_iters):
    s = rng.integers(0, R.shape[0]) # random current state
    actions = np.where(R_goal[s] > 0)[0] # valid actions
    if actions.size == 0:
        continue
    a = rng.choice(actions) # random valid action
    s_next = a # transition to next state
    # Calcul du time difference
    TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a]
    # Equation de Bellman
    Q[s, a] += alpha * TD
 def best_path(start_label: str, goal_label: str):
    s = labels.index(start_label)
    g = labels.index(goal_label)
    path = [start_label]
    while s!= g:
        s = np.argmax(Q[s]) # ici, on récupère l'action
        path.append(labels[s])
    return path
 print("Path E -> G: ", " -> ".join(best_path('E', 'G')))