diff --git a/biblio/2017_Ribeiro_Lustosa_Leandro_D.pdf b/biblio/2017_Ribeiro_Lustosa_Leandro_D.pdf
deleted file mode 100644
index 06308e0feb4ee121dce653d82376c668a53578ba..0000000000000000000000000000000000000000
Binary files a/biblio/2017_Ribeiro_Lustosa_Leandro_D.pdf and /dev/null differ
diff --git a/biblio/MPC in Flight.pdf b/biblio/MPC in Flight.pdf
deleted file mode 100644
index 2e239d6d801ef56d50b0678a6095d0d25a93e09a..0000000000000000000000000000000000000000
Binary files a/biblio/MPC in Flight.pdf and /dev/null differ
diff --git a/biblio/ScienceRobotics22_Foehn.pdf b/biblio/ScienceRobotics22_Foehn.pdf
deleted file mode 100644
index 27774e49c886522c6e6c30df96566c68fb37eeac..0000000000000000000000000000000000000000
Binary files a/biblio/ScienceRobotics22_Foehn.pdf and /dev/null differ
diff --git a/biblio/TD-MPC-2.pdf b/biblio/TD-MPC-2.pdf
deleted file mode 100644
index 490dbce661e584c58180dfe1626820885b196612..0000000000000000000000000000000000000000
Binary files a/biblio/TD-MPC-2.pdf and /dev/null differ
diff --git a/biblio/hopwood-et-al-2024-development-and-evaluation-of-multirotor-flight-dynamic-models-for-estimation-and-control.pdf b/biblio/hopwood-et-al-2024-development-and-evaluation-of-multirotor-flight-dynamic-models-for-estimation-and-control.pdf
deleted file mode 100644
index 1dee5a6473c2609e9f64d0e466e7241822640216..0000000000000000000000000000000000000000
Binary files a/biblio/hopwood-et-al-2024-development-and-evaluation-of-multirotor-flight-dynamic-models-for-estimation-and-control.pdf and /dev/null differ
diff --git a/drone/__pycache__/drone_env.cpython-311.pyc b/drone/__pycache__/drone_env.cpython-311.pyc
index 4fbfcf59f7b588a04139d6fc6c1f88b32cbc0a54..ee1263824c82217fc2e0f2aa1152d41c947b19c7 100644
Binary files a/drone/__pycache__/drone_env.cpython-311.pyc and b/drone/__pycache__/drone_env.cpython-311.pyc differ
diff --git a/drone/__pycache__/drone_model.cpython-311.pyc b/drone/__pycache__/drone_model.cpython-311.pyc
index 8ed2baa184362bf487378913b8d226fb07a0ec34..386e74f5fd3a2ba65d4950d7155b9b4f5d7ad453 100644
Binary files a/drone/__pycache__/drone_model.cpython-311.pyc and b/drone/__pycache__/drone_model.cpython-311.pyc differ
diff --git a/drone/__pycache__/rotation.cpython-311.pyc b/drone/__pycache__/rotation.cpython-311.pyc
index 0242eab78cf10c159fbcfa00024125c5ba7a0a73..91e15f300fe3a866732391f70ef6bb67bc669fad 100644
Binary files a/drone/__pycache__/rotation.cpython-311.pyc and b/drone/__pycache__/rotation.cpython-311.pyc differ
diff --git a/drone/drone_env.py b/drone/drone_env.py
index fc33374f8089283f0e808292c372c9a8c4a65f7c..072ff90e36e270cc449bfbd409077bdd104434e1 100644
--- a/drone/drone_env.py
+++ b/drone/drone_env.py
@@ -2,11 +2,9 @@ import numpy as np
 import gymnasium as gym
 from gymnasium.envs.classic_control import utils
 
+from rotation import quat2eul, deg2rad, rad2deg, quatmul, quatinv
 from drone_model import Drone
-from rotation import quat2eul
 
-deg2rad = np.pi/180
-rad2deg = 1 / deg2rad
 
 class DroneEnv(gym.Env):
 
@@ -31,22 +29,23 @@ class DroneEnv(gym.Env):
             dtype=np.float32,
         )
         self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32)
-        self.action_space = gym.spaces.Box(0, 1, shape=(4,), dtype=np.float32)
+        self.action_space = gym.spaces.Box(0, 500, shape=(4,), dtype=np.float32)
         self.state = np.zeros(13)
+        self.target = np.zeros(13)
         self.obs = np.zeros(12)
-        self.target = np.zeros(12)
         self.steps_beyond_terminated = None
 
     def observation(self):
         pos = self.state[0:3]
+        quat = self.state[3:7]
         vel = self.state[7:10]
         rot = self.state[10:13]
-        quat = self.state[3:7]
-        ang = quat2eul(quat)
         d_pos = pos - self.target[0:3]
-        d_ang = ((ang - self.target[3:6]) + np.pi) % (2*np.pi) - np.pi
-        d_vel = vel - self.target[6:9]
-        d_rot = rot - self.target[9:12]
+        d_quat = quatmul(quat, quatinv(self.target[3:7]))
+        d_vel = vel - self.target[7:10]
+        d_rot = rot - self.target[10:13]
+        
+        d_ang = quat2eul(d_quat)
         obs = np.concatenate([d_pos, d_ang, d_vel, d_rot])
         return obs
 
@@ -55,8 +54,10 @@ class DroneEnv(gym.Env):
         # Note that if you use custom reset bounds, it may lead to out-of-bound
         # state/observations.
         low, high = utils.maybe_parse_reset_bounds(
-            options, -0.05, 0.05  # default low
-        )  # default high
+            options, 
+            -0.05, # default low
+             0.05  # default high 
+        )  
         # self.state = self.np_random.uniform(low=low, high=high, size=(13,))
         self.state = np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
         self.steps_beyond_terminated = None
@@ -67,6 +68,9 @@ class DroneEnv(gym.Env):
         new_state = self.drone.step(self.state, action, self.tau)
         self.state = new_state
         self.obs = self.observation()
-        terminated = (self.obs@self.obs < 1)
-        reward = 1 if terminated else 0  # Binary sparse rewards
+        terminated = bool((self.obs[0:3] < -self.pos_threshold).any()  or (self.obs[0:3] > self.pos_threshold).any())
+        reward = 1 if not terminated else 0  # Binary sparse rewards
         return self.obs, reward, terminated, False, {}
+
+    def render(self, mode='human'):
+        pass
diff --git a/drone/drone_model.py b/drone/drone_model.py
index aeb66a13b68a4b234add01a8203e48c8691ba7c1..696600186984af912607f9b25dd45991a97e369f 100644
--- a/drone/drone_model.py
+++ b/drone/drone_model.py
@@ -1,7 +1,8 @@
 import numpy as np
+import yaml
 from scipy.optimize import fsolve
 from scipy.integrate import solve_ivp
-import yaml
+
 from rotation import quatrot, quatmul
 
 class Drone:
diff --git a/drone/main.py b/drone/main.py
index 666c370d44742cf99e908d29f28bb439ef9f26f4..753443a3f492c4f8b4f06c743dcf601fde4054a3 100644
--- a/drone/main.py
+++ b/drone/main.py
@@ -1,5 +1,6 @@
 import numpy as np
 import matplotlib.pyplot as plt
+
 from drone_model import Drone
 from drone_env import DroneEnv
 
@@ -21,21 +22,16 @@ def uramp(t, t0) :
 
 if __name__ == "__main__":
     drone = Drone()
+    drone_env = DroneEnv(drone)
 
     vh = 0
     vz = 0
     n0 = drone.trim(vh, vz)
     print(n0)
 
-    drone_env = DroneEnv(drone)
-    drone_env.target = np.array([0, 0, -20, 0, 0, 0, 0, 0, 0, 0, 0, 0])
-    obs0 = drone_env.reset()[0]
+    obs = drone_env.reset()
     x0 = drone_env.state
-    print(drone_env.state)
-    obs1 = drone_env.step(n0)
-    print(obs0)
-    print(drone_env.state)
-
+    
 
     def ctl(t, s):
         dn = (upulse(t, 1, 2) - upulse(t, 2, 3))*np.linalg.norm(n0)*np.array([0.01, 0.01, 0.01, 0.01])
diff --git a/drone/rotation.py b/drone/rotation.py
index 563c8f4d59fa06f7533be4b73325cf976578069f..29b59136f1bce87d2c25171b1bbe8a8613b039f3 100644
--- a/drone/rotation.py
+++ b/drone/rotation.py
@@ -3,7 +3,6 @@ import numpy as np
 deg2rad = np.pi/180
 rad2deg = 1 / deg2rad
 
-
 def hat(x):
     return np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])
 
diff --git a/drone/sac_mavion_1.zip b/drone/sac_mavion_1.zip
new file mode 100644
index 0000000000000000000000000000000000000000..786922455c060dda88ba67acc49787dd5efca298
Binary files /dev/null and b/drone/sac_mavion_1.zip differ
diff --git a/drone/sac_mavion_2.zip b/drone/sac_mavion_2.zip
new file mode 100644
index 0000000000000000000000000000000000000000..9ce2ae897ade8f300f94fe8babf22c75a0dab9e4
Binary files /dev/null and b/drone/sac_mavion_2.zip differ
diff --git a/drone/test_quat.py b/drone/test_quat.py
new file mode 100644
index 0000000000000000000000000000000000000000..425c25dd0df05464213e4f8657f5fe7565d36b8e
--- /dev/null
+++ b/drone/test_quat.py
@@ -0,0 +1,14 @@
+import numpy as np
+from rotation import *
+
+eul1 = np.array([0, 10, 0]) * deg2rad
+eul2 = np.array([10, 0, 0]) * deg2rad
+
+q1 = eul2quat(eul1)
+q2 = eul2quat(eul2)
+
+d_q = quatmul(q1, quatinv(q2))
+print(q1, q2, d_q)
+
+d_eul = quat2eul(d_q) * rad2deg
+print(d_eul)
\ No newline at end of file
diff --git a/drone/train.py b/drone/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..ab357f2d9b7c9b66e6eafb0c8f573b0926c77d34
--- /dev/null
+++ b/drone/train.py
@@ -0,0 +1,18 @@
+import numpy as np
+from stable_baselines3 import SAC
+
+from drone_model import Drone
+from drone_env import DroneEnv
+
+
+if __name__ == "__main__":
+
+    drone = Drone()
+    drone_env = DroneEnv(drone)
+
+    drone_env.target = np.array([1, 1, -20, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+
+    model = SAC("MlpPolicy", drone_env, verbose=1)
+    model.learn(total_timesteps=20000, log_interval=10)
+    model.save("sac_mavion_2")
+
diff --git a/mavion/__pycache__/mavion_model.cpython-311.pyc b/mavion/__pycache__/mavion_model.cpython-311.pyc
index 36b589965e84da4a609c7f9fc9026c6e1ee8bafd..24e16ea710566fb8fa11f9046b2484f6a0219c2b 100644
Binary files a/mavion/__pycache__/mavion_model.cpython-311.pyc and b/mavion/__pycache__/mavion_model.cpython-311.pyc differ
diff --git a/mavion/__pycache__/rotation.cpython-311.pyc b/mavion/__pycache__/rotation.cpython-311.pyc
index 749ea808773963bcc6e585f5880ffabfbb892714..d04dfdc4071ab3d8dad6f839418d756132544fdb 100644
Binary files a/mavion/__pycache__/rotation.cpython-311.pyc and b/mavion/__pycache__/rotation.cpython-311.pyc differ
diff --git a/mavion/main.py b/mavion/main.py
index 69944de72b5b8de5e783ac2eb63419bd0a3c96b2..55216576be88be37e12c161146cbc4384db98820 100644
--- a/mavion/main.py
+++ b/mavion/main.py
@@ -34,14 +34,14 @@ if __name__ == "__main__":
     quat = eul2quat(ang)
 
     x0 = np.concatenate([pos, quat, vel, rot])
-    print(mavion.equi(x0))
+    u0 = np.array([-dx, dx, de, de])
 
-    def ctl(t, s, w):
+    def ctl(t, s):
         ddx = (upulse(t, 5, 6) - upulse(t, 6, 7))*dx*0.1
         qt = np.array([1/np.sqrt(2), 0, 1/np.sqrt(2), 0])
         q = s[3:7]
-        dde = 1.0 * (q - qt)[2]
-        u = np.array([-dx-ddx, dx+ddx, de+dde, de+dde])
+        dde = 0.0 * (q - qt)[2]
+        u = u0 + np.array([-ddx, ddx, dde, dde])
         return u
 
     def wnd(t, s):
@@ -60,11 +60,11 @@ if __name__ == "__main__":
 
     ax = axs[1][0]
     quat = sim.y[3:7].T
-    ax.plot(sim.t, quat, label=('q0', 'q1', 'q2', 'q3'))
-    ax.set_title(label='quaternion')
-    # eul = np.apply_along_axis(quat2eul, 1, sim.y[3:7].T)*57.3
-    # ax.plot(sim.t, eul, label=('phi', 'theta', 'psi'))
-    # ax.set_title(label='euler angles')
+    # ax.plot(sim.t, quat, label=('q0', 'q1', 'q2', 'q3'))
+    # ax.set_title(label='quaternion')
+    eul = np.apply_along_axis(quat2eul, 1, quat) * rad2deg
+    ax.plot(sim.t, eul, label=('phi', 'theta', 'psi'))
+    ax.set_title(label='euler angles')
     ax.legend()
     ax.grid()
 
diff --git a/mavion/mavion_env.py b/mavion/mavion_env.py
index 967595915b339160d1c032700d3c3d7667048fc9..b3a93c6d5f4646abdf7715482ab4901467a937c5 100644
--- a/mavion/mavion_env.py
+++ b/mavion/mavion_env.py
@@ -8,61 +8,47 @@ class MavionEnv(gym.Env):
     def __init__(self, mavion=Mavion(), render_mode=None):
 
         self.tau = 0.02  # seconds between state updates
-        
+
         # Angle at which to fail the episode
         self.pos_threshold = np.array([2.4, 2.4, 2.4])
         self.ang_threshold = np.array([15, 15, 15]) * deg2rad
+        self.quat_threshold = np.array([1, 1, 1, 1])
         self.vel_threshold = np.finfo(np.float32).max * np.ones(3)
         self.rot_threshold = np.finfo(np.float32).max * np.ones(3)
 
         high = np.concatenate(
             [
                 self.pos_threshold * 2,
-                self.ang_threshold * 2,
+                # self.ang_threshold * 2,
+                self.quat_threshold,
                 self.vel_threshold,
                 self.rot_threshold
             ],
             dtype=np.float32,
         )
         self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32)
-
         self.action_space = gym.spaces.Box(0, 1, shape=(4,), dtype=np.float32)
-        
-        self.target = None
-        self.obs = None
+        self.target = np.zeros(13)
+        self.state = np.zeros(13)
+        self.obs = np.zero(13)
         self.steps_beyond_terminated = None
-
         self.mavion = mavion
 
-    def obs(self):
-        Vh = np.linalg.norm(self.mavion.vel[0:2])
-        Vv = self.mavion.vel[2]
-        Cap = np.arctan2(self.mavion.vel[1], self.mavion.vel[0])
-        phi = self.mavion.ang[0]
-        Turn_rate = self.mavion.G / Vh * np.tan(phi)
-        return np.array([Vh, Vv, Cap, Turn_rate])
-
     def reset(self, seed=None, options=None):
         super().reset(seed=seed)
-
-        # Note that if you use custom reset bounds, it may lead to out-of-bound
-        # state/observations.
-        low, high = utils.maybe_parse_reset_bounds(
-            options, -0.05, 0.05  # default low
-        )  # default high
+        low, high = utils.maybe_parse_reset_bounds(options, -0.05, 0.05) 
         self.target = np.zeros(12)
         self.obs = self.np_random.uniform(low=low, high=high, size=(12,))
         self.state = self.obs + self.target
         self.steps_beyond_terminated = None
-
         return self.obs, {}
 
     def step(self, action):
-        x = s2q(self.state)
+        x = self.state
         u = action
         w = np.zeros(3)
         new_state = self.mavion.step(x, u, w, self.tau)
-        self.state = q2s(new_state)
+        self.state = new_state
         terminated = np.array_equal(self.state, self.target)
         reward = 1 if terminated else 0  # Binary sparse rewards
         self.obs = self.state - self.target
diff --git a/mavion/mavion_model.py b/mavion/mavion_model.py
index 11d35c06229d1e4cf38a30cff4ebfe80f4b17152..4bc758abf25e538435fbf9c365228baf93c1325b 100644
--- a/mavion/mavion_model.py
+++ b/mavion/mavion_model.py
@@ -36,23 +36,16 @@ class Mavion:
         self.P_A1_CG = np.array(drone_data["P_A1_CG"]) * self.CHORD
         self.P_A2_CG = np.array(drone_data["P_A2_CG"]) * self.CHORD
 
-
         self.Cd0 = 0.1
         self.Cy0 = 0.1
         self.dR = -0.1 * self.CHORD
-
         self.PHI_fv = np.diag([self.Cd0, self.Cy0, (2 * np.pi + self.Cd0)])
-
         self.PHI_mv = np.array([[0, 0, 0], 
             [0, 0, - self.dR * (2 * np.pi + self.Cd0) / self.CHORD], 
             [0, self.dR * self.Cy0 / self.WINGSPAN, 0]])
-
         self.PHI_mw = 1 / 2 * np.diag([0.5, 0.5, 0.5])
-
         self.PHI = np.block([[self.PHI_fv, self.PHI_mv.T], [self.PHI_mv, self.PHI_mw]])
 
-
-    
     def thrust(self, dx):
         kp = self.PROP_KP
         km = self.PROP_KM
@@ -63,7 +56,6 @@ class Mavion:
         return np.array([T, N])
 
     def aero(self, quat, vel, rot, T, de, w):
-
         # data extraction from self struct
         PHI_fv = self.PHI_fv
         PHI_mv = self.PHI_mv
@@ -80,48 +72,41 @@ class Mavion:
 
         # derivative data
         Sp = np.pi * Prop_R**2
-
-        # DCM computation
-        D = quat2dcm(quat)
-
-        # freestream velocity computation in body frame
-        vinf = D @ (vel - w)
-
         # computation of total wing section area
         S = Swet + Sdry
-
         # computation of chord matrix
-        B = np.diag([ws, chord, ws])
+        B = np.array([ws, chord, ws])
 
+        # freestream velocity computation in body frame
+        vinf = quatrot(vel - w, quat)
         # eta computation
-        eta = np.sqrt(vinf@vinf + PHI_n * (B@rot)@(B@rot))
+        eta = np.sqrt(vinf@vinf + PHI_n * (B*rot)@(B*rot))
+        qv = 1/2*RHO*S*eta
 
         # Force computation
         # airfoil contribution
-        Fb_a = -1/2*RHO*S*eta * PHI_fv@vinf - 1/2*RHO*S*eta * PHI_mv@B@rot - 1/2*Swet/Sp * PHI_fv@T
+        Fb_a = -qv * (PHI_fv@vinf + PHI_mv@(B*rot)) - 1/2*Swet/Sp * PHI_fv@T
         # elevon contribution
-        Fb_e = 1/2*RHO*S*eta * PHI_fv@np.cross(de*Thetaf, vinf) \
-                + 1/2*RHO*S*eta * PHI_mv@B@np.cross(de*Thetaf, rot) \
+        Fb_e = qv * (PHI_fv@np.cross(de*Thetaf, vinf) + PHI_mv@(B*np.cross(de*Thetaf, rot))) \
                 + 1/2*Swet/Sp * PHI_fv@np.cross(de*Thetaf, T)
         Fb = Fb_a + Fb_e
 
         # Moment computation
         # airfoil contribution
-        Mb_a = -1/2*RHO*S*eta * B@PHI_mv@vinf - 1/2*RHO*S*eta * B@PHI_mw@B@rot - 1/2*Swet/Sp * B@PHI_mv@T
+        Mb_a = -qv * (B*(PHI_mv@vinf) + B*(PHI_mw@(B*rot))) - 1/2*Swet/Sp * B*(PHI_mv@T)
         # elevon contribution
-        Mb_e = 1/2*RHO*S*eta * B@PHI_mv@np.cross(de*Thetam, vinf) \
-                + 1/2*RHO*S*eta * B@PHI_mw@B@np.cross(de*Thetam, rot) \
-                + 1/2*Swet/Sp * B@PHI_mv@np.cross(de*Thetam, T)
+        Mb_e = qv * (B*(PHI_mv@np.cross(de*Thetam, vinf)) + B*(PHI_mw@(B*np.cross(de*Thetam, rot)))) \
+                + 1/2*Swet/Sp * B*(PHI_mv@np.cross(de*Thetam, T))
         Mb = Mb_a + Mb_e
 
         return np.array([Fb, Mb])
 
+
     def dyn(self, x, u, w):
 
         G = self.G
         MASS = self.MASS
         INERTIA = self.INERTIA
-
         # position of propellers wrt center of gravity
         P_P1_CG = self.P_P1_CG
         P_P2_CG = self.P_P2_CG
@@ -137,16 +122,13 @@ class Mavion:
         vel = x[7:10]
         rot = x[10:13]
 
-        # linear velocity derivative
-        D = quat2dcm(quat)
-
         [T1, N1] = self.thrust(u[0])
         [T2, N2] = self.thrust(u[1])
         [F1, M1] = self.aero(quat, vel, rot, T1, u[2], w)
         [F2, M2] = self.aero(quat, vel, rot, T2, u[3], w)
 
         Fb = T1 + T2 + F1 + F2
-        dvdt = np.array([0, 0, G]) + 1/MASS * D.T@Fb
+        dvdt = np.array([0, 0, G]) + 1/MASS * quatrot(Fb, quatinv(quat))
 
         # angular velocity derivative
         Ma = M1 + M2 + np.cross(P_A1_CG, F1) + np.cross(P_A2_CG, F2)
@@ -164,13 +146,6 @@ class Mavion:
 
         return np.concatenate([dpdt, dqdt, dvdt, drdt])
 
-    def equi(self, x):
-        vel = x[7:10]
-        vh = np.linalg.norm(vel[0:2])
-        vz = vel[2]
-        cap = np.arctan2(vel[1], vel[0])
-        return np.array([vh, vz, cap])
-
     def trim(self, vh, vz):
         x = np.zeros(13)
         x[7] = vh
@@ -193,6 +168,9 @@ class Mavion:
         return sol.y.T[-1]
 
     def sim(self, t_span, x0, fctrl, fwind):
-        func = lambda t, s: self.dyn(s, fctrl(t, s, fwind(t, s)), fwind(t, s))
-        sol = solve_ivp(func, t_span, x0, method='RK45', max_step=0.01)
+        func = lambda t, s: self.dyn(s, fctrl(t, s), fwind(t, s))
+        def hit_ground(t, s): return -s[2]
+        hit_ground.terminal = True
+        hit_ground.direction = -1
+        sol = solve_ivp(func, t_span, x0, method='RK45', max_step=0.01, events=hit_ground)
         return sol
diff --git a/mavion/rotation.py b/mavion/rotation.py
index 5e5ea07dd985e50ded4ba4fde7d1436aea7698c4..25a1702a0ec1008df147849c52e6870e48734ed0 100644
--- a/mavion/rotation.py
+++ b/mavion/rotation.py
@@ -1,5 +1,8 @@
 import numpy as np
 
+deg2rad = np.pi/180
+rad2deg = 180/np.pi
+
 def hat(x):
     return np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])