diff --git a/mavion/__pycache__/mavion_env.cpython-311.pyc b/mavion/__pycache__/mavion_env.cpython-311.pyc
index 84e9fb241a4f32f92826bf9b05ff6fc6b79eed10..cbc9203e9109be9974532a6673e19dba4818769b 100644
Binary files a/mavion/__pycache__/mavion_env.cpython-311.pyc and b/mavion/__pycache__/mavion_env.cpython-311.pyc differ
diff --git a/mavion/__pycache__/mavion_model.cpython-311.pyc b/mavion/__pycache__/mavion_model.cpython-311.pyc
index 99265172a594f535caf625c15e36f1e649ca7995..da1a7717668e27a765b555539d9ed0cdbb20e3ef 100644
Binary files a/mavion/__pycache__/mavion_model.cpython-311.pyc and b/mavion/__pycache__/mavion_model.cpython-311.pyc differ
diff --git a/mavion/mavion_env.py b/mavion/mavion_env.py
index abf7fd0b1748d9ae151690c53e27efd1db8f24d5..fb76cc37de234f3d96a6440daad94ade3ab49285 100644
--- a/mavion/mavion_env.py
+++ b/mavion/mavion_env.py
@@ -34,18 +34,21 @@ class MavionEnv(gym.Env):
         self.action_range = np.array([mavion.MAX_ROTOR_SPD, mavion.MAX_ROTOR_SPD, mavion.MAX_FLAP_DEF, mavion.MAX_FLAP_DEF])
 
         self.state = np.zeros(13)
-        self.target = np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+        self.target = np.zeros(13)
         self.steps_beyond_terminated = None
 
     def observation(self):
-        self.obs = self.state - self.target
-        self.obs[3:7] = quatmul(quatinv(self.state[3:7]), self.target[3:7])
+        obs = self.state - self.target
+        obs[3:7] = quatmul(quatinv(self.state[3:7]), self.target[3:7])
+        self.obs = obs
         return self.obs
 
     def distance(self, s, t):
-        dpos = np.linalg.norm(t[0:3] - s[0:3]) # distance euclidienne
-        dquat = 1 - abs(sum(t[3:7] * s[3:7]))  # compris entre 0 et 1
-        return dpos + dquat
+        d_pos = np.linalg.norm(t[0:3] - s[0:3]) # distance euclidienne
+        d_quat = 1 - abs(sum(t[3:7] * s[3:7]))  # compris entre 0 et 1
+        d_vel = np.linalg.norm(t[7:10] - s[7:10]) # distance euclidienne
+        d_rot = np.linalg.norm(t[10:13] - s[10:13]) # distance euclidienne
+        return d_pos + d_quat*10 + d_vel*0 + d_rot*0
 
     def reward(self):
         dist = self.distance(self.state, self.target)
@@ -57,15 +60,15 @@ class MavionEnv(gym.Env):
         x = self.state
         u = action * self.action_range
         self.state  = self.mavion.step(x, u, np.zeros(3), self.tau)
-        observation = self.observation()
+        self.obs = self.observation()
         reward, terminated = self.reward()
-        return observation, reward, terminated, False, {}
+        return self.obs, reward, terminated, False, {}
 
     def reset(self, seed=None, options=None):
         super().reset(seed=seed)
-        self.obs = np.zeros(13)
-        low, high = utils.maybe_parse_reset_bounds(options, -1, 1) 
-        self.obs[0:3] = self.np_random.uniform(low=low, high=high, size=(3,))
-        self.state = self.target + self.obs
+        low, high = utils.maybe_parse_reset_bounds(options, -1, 1)
+        delta = self.np_random.uniform(low=low, high=high, size=(3,))
+        self.state = self.target + np.concatenate([delta, np.zeros(10)])
+        self.obs = self.observation()
         self.steps_beyond_terminated = None
         return self.obs, {}
diff --git a/mavion/mavion_model.py b/mavion/mavion_model.py
index ded1e352c3f6d74d76521c3ce0fbd0023884f8d6..81a472b0e378355ab7db729c9fff259da5d587e6 100644
--- a/mavion/mavion_model.py
+++ b/mavion/mavion_model.py
@@ -121,7 +121,7 @@ class Mavion:
         quat = x[3:7]
         vel = x[7:10]
         rot = x[10:13]
-
+        
         [T1, N1] = self.thrust(-u[0])       
         [T2, N2] = self.thrust(u[1])
         tau1 = N1 - (rot[0] - u[0]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]])
diff --git a/mavion/sac_mavion_1.zip b/mavion/sac_mavion_1.zip
index c6f32c49fc5f05c1f750a783e2adb0c6ee250302..8b99de1c21b5e3bfd6a57d15309a1b32651317ee 100644
Binary files a/mavion/sac_mavion_1.zip and b/mavion/sac_mavion_1.zip differ
diff --git a/mavion/train.py b/mavion/train.py
index 59e6d3a7ca37ee9899df6b9d0aca5fa59e8811b2..b3c16f988a479d614fc1f5e44c1afa0b04431592 100644
--- a/mavion/train.py
+++ b/mavion/train.py
@@ -1,12 +1,22 @@
 import numpy as np
 from stable_baselines3 import SAC
 from mavion_env import MavionEnv
+from rotation import *
 
 
 if __name__ == "__main__":
 
+    pos_target = np.array([0, 0, -20])
+    psi_target = 45
+    eul_target = np.array([0, 90, 45])*deg2rad
+    quat_target = eul2quat(eul_target)
+
     env = MavionEnv()
-    env.target = np.array([0, 0, -20, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+    env.target = np.concatenate([pos_target, quat_target, np.zeros(6)])
+    print(env.target)
+    obs = env.reset()
+    print(env.state)
+    print(obs)
 
     model = SAC("MlpPolicy", env, verbose=1)
     model.learn(total_timesteps=20000, log_interval=10)
diff --git a/rotations/quaternion.ipynb b/rotations/quaternion.ipynb
index 718fc4e088e4adce66a743397343a8d1e1da47a0..8376d154a25f3ecd54adcc392c8dbd6c42eeeee6 100644
--- a/rotations/quaternion.ipynb
+++ b/rotations/quaternion.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -12,7 +12,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "s1 = np.array()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -37,7 +46,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -46,7 +55,7 @@
      "text": [
       "[0.         1.04719755 0.        ]\n",
       "[0.8660254 0.        0.5       0.       ] [0.8660254 0.        0.5       0.       ]\n",
-      "0.19989685480873443 0.6433291804340877 [-0.33141357  0.46193977 -0.19134172]\n"
+      "0.19989685480873443 0.6433291804340877 [ 0.80010315 -0.33141357  0.46193977 -0.19134172]\n"
      ]
     }
    ],
@@ -61,11 +70,12 @@
     "q2b = eul2quat(dcm2eul(R2))\n",
     "print(q2, q2b)\n",
     "\n",
+    "\n",
     "d1 = 1 - abs(sum(q1*q2))\n",
     "errq = quatmul(quatinv(q1),q2)\n",
     "d2 = np.arccos(errq[0])\n",
     "epsq = errq[1:4]\n",
-    "print(d1, d2, epsq)\n"
+    "print(d1, d2, errq)\n"
    ]
   }
  ],