From 3d8dab7f0602510486a30ae5658d3a946cf687ea Mon Sep 17 00:00:00 2001 From: Namu Date: Fri, 19 Sep 2025 18:20:43 +0200 Subject: [PATCH] First commit --- .gitea/workflows/sonar.yml | 32 ++++++++++++++ .gitignore | 5 +++ README.md | 34 +++++++++++++++ ex1.py | 87 +++++++++++++++++++++++++++++++++++++ requirements.txt | Bin 0 -> 206 bytes sample.py | 68 +++++++++++++++++++++++++++++ 6 files changed, 226 insertions(+) create mode 100644 .gitea/workflows/sonar.yml create mode 100644 .gitignore create mode 100644 README.md create mode 100644 ex1.py create mode 100644 requirements.txt create mode 100644 sample.py diff --git a/.gitea/workflows/sonar.yml b/.gitea/workflows/sonar.yml new file mode 100644 index 0000000..436073c --- /dev/null +++ b/.gitea/workflows/sonar.yml @@ -0,0 +1,32 @@ +name: SonarQube Scan + +on: + push: + branches: + - '**' + pull_request: + branches: + - '**' + +jobs: + sonarqube: + name: SonarQube Trigger + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Download SonarQube Scanner + run: | + curl -sSLo sonar-scanner.zip https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-5.0.1.3006-linux.zip + unzip sonar-scanner.zip + + - name: Run SonarQube Scan + run: | + ./sonar-scanner-*/bin/sonar-scanner \ + -Dsonar.projectKey=tp1-iaavancee \ + -Dsonar.sources=. \ + -Dsonar.host.url=${{ secrets.SONARQUBE_HOST }} \ + -Dsonar.login=${{ secrets.SONARQUBE_TOKEN }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..d0e231e --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +# Pycharm +.idea/ + +# virtual env +.venv/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..8cf693f --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# TP1 IA Avancée + +Le but de ce tp est de faire se déplacer une IA dans un entrepot en lui donnant un point de départ et d'arrivé. + +## Install + +Tout d'abord créer un venv nommé `venv` + +```bash +python -m venv venv +``` + +Puis pour activer le venv : + +- Windows : +```powershell +venv\Scripts\activate +``` +- Linux : +```bash +source venv/bin/activate +``` +- MacOS : +Je sais pas, GLHF + +Pour installer les librairies dans un venv, utilisez cette commande : + +```bash +pip install -r requirements.txt +``` + +## Authors + +[Thomas SAZERAT]() diff --git a/ex1.py b/ex1.py new file mode 100644 index 0000000..3157ad1 --- /dev/null +++ b/ex1.py @@ -0,0 +1,87 @@ +import numpy as np +import pandas as pd +from typing import List + + +def best_path(start_label: str, goal_label: str) -> List[str]: + s = labels.index(start_label) + g = labels.index(goal_label) + path = [start_label] + while s!= g: + a = np.argmax(Q[s]) # ici, on récupère l'action + next = NEXT[s, a] + if next is None: + raise f'Action impossible State{s} Action{a} NextState{next}' + s = labels.index(next) + path.append(next) + return path + + +labels = list("ABCDEFGHIJKL") + +R = np.array([ + # UP DOWN LEFT RIGHT + [0,0,0,1], #A + [0,1,1,1], #B + [0,1,1,0], #C + [0,1,0,0], #D + [0,1,0,0], #E + [1,1,0,0], #F + [1,0,0,1], #G + [1,1,1,0], #H + [1,0,0,1], #I + [1,0,1,1], #J + [0,0,1,1], #K + [1,0,1,0], #L +], dtype=float) + +# Fait une matrice de même dimension que R remplie de 0 +Q = np.zeros_like(R) + +# on a l'état courant et l'action en cours, il nous faut st+1 (la prochaine action) +NEXT = np.array([ + # UP DOWN LEFT RIGHT + [None, None, None, 'B'], #A + [None, 'F', 'A', 'C'], #B + [None, 'G', 'B', None], #C + [None, 'H', None, None], #D + [None, 'I', None, None], #E + ['B', 'J', None, None], #F + ['C', None, None, 'H'], #G + ['D', 'L', 'G', None], #H + ['E', None, None, 'J'], #I + ['F', None, 'I', 'K'], #J + [None, None, 'J', 'L'], #K + ['H', None, 'K', None], #L +]) + +# Hyperparameters +gamma = 0.75 +alpha = 0.90 +n_iters = 1_000 + +rng = np.random.default_rng(0) + +# augmente le reward pour les directions qui mènent à G (C DOWN & H LEFT) +goal_opt1 = labels.index('C') +down_index = 1 +goal_opt2 = labels.index('H') +left_index = 2 +R_goal = R.copy() +R_goal[goal_opt1, down_index] = 1_000.0 +R_goal[goal_opt2, left_index] = 1_000.0 + +for _ in range(n_iters): + s = rng.integers(0, R.shape[0]) # random current state + actions = np.where(R_goal[s] > 0)[0] # valid actions + if actions.size == 0: + continue + a = rng.choice(actions) # random valid action + s_next = a # transition to next state + TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a] + Q[s, a] += alpha * TD + +print("Matrice Q: ") +print(Q) + +print("Path E -> G: ", " -> ".join(best_path('E', 'G'))) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..4b59c127cce7235124c0e80fb8583b357e7de220 GIT binary patch literal 206 zcmZvV!3x4K5Jca(&`+_XDIy;Hm_mgXY78WT&Bv=_8pJ|bb`LvmX5KHygBzZOi^kH> z<*ljo?QsmcVWV1Bbs;ix7jwu|SX ODJOgzBdPTCyUq`?SsrTu literal 0 HcmV?d00001 diff --git a/sample.py b/sample.py new file mode 100644 index 0000000..6d50275 --- /dev/null +++ b/sample.py @@ -0,0 +1,68 @@ +import numpy as np +import pandas as pd + +labels = list("ABCDEFGHIJKL") + +R = np.array([ + [0,1,0,0,0,0,0,0,0,0,0,0], #A + [1,0,1,0,0,1,0,0,0,0,0,0], #B + [0,1,0,0,0,0,1,0,0,0,0,0], #C + [0,0,0,0,0,0,0,1,0,0,0,0], #D + [0,0,0,0,0,0,0,0,1,0,0,0], #E + [0,1,0,0,0,0,0,0,0,1,0,0], #F + [0,0,1,0,0,0,0,1,0,0,0,0], #G + [0,0,0,1,0,0,1,0,0,0,0,1], #H + [0,0,0,0,1,0,0,0,0,1,0,0], #I + [0,0,0,0,0,1,0,0,1,0,1,0], #J + [0,0,0,0,0,0,0,0,0,1,0,1], #K + [0,0,0,0,0,0,0,1,0,0,1,0], #L +], dtype=float) + +# Fait une matrice de même dimension que R remplie de 0 +Q = np.zeros_like(R) + +print(type(R)) # recup le type +print(R.ndim) # 2 -> matrice 2d +print(R.shape) # (3, 3) -> 3 lignes 3 colonnes +print(R.dtype) # float64 +print(R.size) # 9 éléments +print(R.strides) # e.g. (24,8) + +# huperparamètre + +gamma = 0.75 +alpha = 0.90 +n_iters = 1000 + +rng = np.random.default_rng(0) + +# Train Q-Learning for goal 'G' +goal_label = 'G' +goal = labels.index(goal_label) +R_goal = R.copy() +R_goal[goal, goal] = 1000.0 + +for _ in range(n_iters): + s = rng.integers(0, R.shape[0]) # random current state + actions = np.where(R_goal[s] > 0)[0] # valid actions + if actions.size == 0: + continue + a = rng.choice(actions) # random valid action + s_next = a # transition to next state + # Calcul du time difference + TD = R_goal[s, a] + gamma * Q[s_next].max() - Q[s, a] + # Equation de Bellman + Q[s, a] += alpha * TD + + +def best_path(start_label: str, goal_label: str): + s = labels.index(start_label) + g = labels.index(goal_label) + path = [start_label] + while s!= g: + s = np.argmax(Q[s]) # ici, on récupère l'action + path.append(labels[s]) + return path + + +print("Path E -> G: ", " -> ".join(best_path('E', 'G')))