diff --git a/drone/__pycache__/drone_env.cpython-311.pyc b/drone/__pycache__/drone_env.cpython-311.pyc
index 9ad793b14e6ee6e49c10bfb224e26dade36ddac3..0271c1870278ae0f27a92ccd283dc1f7d3faff37 100644
Binary files a/drone/__pycache__/drone_env.cpython-311.pyc and b/drone/__pycache__/drone_env.cpython-311.pyc differ
diff --git a/mavion/__pycache__/mavion_env.cpython-311.pyc b/mavion/__pycache__/mavion_env.cpython-311.pyc
index 2a9bdc5489891a76c2de6e78b6aa807c5218e45b..435e0f3aba480aad7c3a3c015776c7db12a0a6bc 100644
Binary files a/mavion/__pycache__/mavion_env.cpython-311.pyc and b/mavion/__pycache__/mavion_env.cpython-311.pyc differ
diff --git a/mavion/__pycache__/mavion_model.cpython-311.pyc b/mavion/__pycache__/mavion_model.cpython-311.pyc
index 24e16ea710566fb8fa11f9046b2484f6a0219c2b..f441af6fb0fcd84230ee54145cbb9c44dc100361 100644
Binary files a/mavion/__pycache__/mavion_model.cpython-311.pyc and b/mavion/__pycache__/mavion_model.cpython-311.pyc differ
diff --git a/mavion/main.py b/mavion/main.py
index 6e2960906c94b9351529f7e4ea51faa270cd3fc6..89fb6b538c99c9db154e2660e76c09039703cf10 100644
--- a/mavion/main.py
+++ b/mavion/main.py
@@ -19,12 +19,14 @@ def uramp(t, t0) :
 if __name__ == "__main__":
     mavion_env = MavionEnv()
 
-    vh = 0
+    vh = 10
     vz = 0
     cap = 0
     omega = 0
 
-    dx, de, theta = mavion_env.mavion.trim(vh, vz)
+    target = [vh, vz]
+
+    dx, de, theta = mavion_env.mavion.trim(target)
     theta = (theta + np.pi) % (2*np.pi) - np.pi
     print(dx, de, theta*57.3)
 
diff --git a/mavion/mavion.yaml b/mavion/mavion.yaml
index 63ef25c774aa46d725b1d894e07d057ea883ecab..9782da1a57e6a467e9f5ab979eb5961f2426b832 100644
--- a/mavion/mavion.yaml
+++ b/mavion/mavion.yaml
@@ -22,3 +22,5 @@ ELEVON_MEFFICIENCY : [0, 0.66, 0]
 ELEVON_FEFFICIENCY : [0, 0.33, 0] 
 PROP_KP : 7.84e-06 
 PROP_KM : 2.400e-7     
+MAX_ROTOR_SPD : 2500  # rad/s
+MAX_FLAP_DEF :  1     # rad
diff --git a/mavion/mavion_env.py b/mavion/mavion_env.py
index e7ad537ed17b72f382da1c2bb24423bb2671cccd..1b2b72ed02ff5dc918feead797e83abf7abc24dd 100644
--- a/mavion/mavion_env.py
+++ b/mavion/mavion_env.py
@@ -4,11 +4,14 @@ from gymnasium.envs.classic_control import utils
 from mavion_model import Mavion
 from rotation import *
 
+
+deg2rad = np.pi / 180
+
 class MavionEnv(gym.Env):
 
     def __init__(self, mavion=Mavion(), render_mode=None):
 
-        self.tau = 0.02  # seconds between state updates
+        self.tau = 0.2  # seconds between state updates
 
         # Angle at which to fail the episode
         self.pos_threshold = np.array([2.4, 2.4, 2.4])
@@ -28,29 +31,40 @@ class MavionEnv(gym.Env):
             dtype=np.float32,
         )
         self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32)
-        self.action_space = gym.spaces.Box(0, 1, shape=(4,), dtype=np.float32)
-        self.target = np.zeros(13)
+        self.action_space = gym.spaces.Box(-1, 1, shape=(4,), dtype=np.float32)
+        self.action_range = np.array([mavion.MAX_ROTOR_SPD, mavion.MAX_ROTOR_SPD, mavion.MAX_FLAP_DEF, mavion.MAX_FLAP_DEF])
+
         self.state = np.zeros(13)
-        self.obs = np.zeros(13)
+        self.target = np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+        self.obs = self.state - self.target
         self.steps_beyond_terminated = None
         self.mavion = mavion
 
     def reset(self, seed=None, options=None):
         super().reset(seed=seed)
-        low, high = utils.maybe_parse_reset_bounds(options, -0.05, 0.05) 
-        self.target = np.zeros(13)
-        self.obs = self.np_random.uniform(low=low, high=high, size=(13,))
-        self.state = self.obs + self.target
+        self.obs = np.zeros(13)
+        low, high = utils.maybe_parse_reset_bounds(options, -0.1, 0.1) 
+        self.obs[0:3] = self.np_random.uniform(low=low, high=high, size=(3,))
+        self.state = self.target + self.obs
         self.steps_beyond_terminated = None
         return self.obs, {}
 
+    def distance(self, s, t):
+        return np.linalg.norm(t[0:3] - s[0:3])
+
+    def cost(self, s, u, t):
+        u_trim = self.mavion.trim(t)
+        s_trim = self.mavion.trim_state(t)
+        J = (s - s_trim).T@Q@(s - s_trim) + (u - u_trim).T@Q@(u - u_trim)
+        return J
+
     def step(self, action):
         x = self.state
-        u = action
+        u = action * self.action_range
         w = np.zeros(3)
         new_state = self.mavion.step(x, u, w, self.tau)
         self.state = new_state
-        terminated = np.array_equal(self.state, self.target)
-        reward = 1 if terminated else 0  # Binary sparse rewards
+        terminated = self.distance(self.state, self.target) > 10
+        reward = 1 if not terminated else -10
         self.obs = self.state - self.target
         return self.obs, reward, terminated, False, {}
diff --git a/mavion/mavion_model.py b/mavion/mavion_model.py
index 4bc758abf25e538435fbf9c365228baf93c1325b..fe31db5e871ff63134208556c2a5c215ca13b067 100644
--- a/mavion/mavion_model.py
+++ b/mavion/mavion_model.py
@@ -28,23 +28,26 @@ class Mavion:
 
         self.ELEVON_MEFFICIENCY = np.array(drone_data['ELEVON_MEFFICIENCY'])
         self.ELEVON_FEFFICIENCY = np.array(drone_data['ELEVON_FEFFICIENCY'])
-        self.PHI_n = drone_data["PHI_n"]
+        self.MAX_ROTOR_SPD = drone_data['MAX_ROTOR_SPD']
+        self.MAX_FLAP_DEF = drone_data['MAX_FLAP_DEF']
 
         # geometric parameters
-        self.P_P1_CG = np.array(drone_data["P_P1_CG"]) * self.CHORD
-        self.P_P2_CG = np.array(drone_data["P_P2_CG"]) * self.CHORD
-        self.P_A1_CG = np.array(drone_data["P_A1_CG"]) * self.CHORD
-        self.P_A2_CG = np.array(drone_data["P_A2_CG"]) * self.CHORD
+        self.P_P1_CG = np.array(drone_data["P_P1_CG"]) * np.array([self.CHORD, self.WINGSPAN, 0])
+        self.P_P2_CG = np.array(drone_data["P_P2_CG"]) * np.array([self.CHORD, self.WINGSPAN, 0])
+        self.P_A1_CG = np.array(drone_data["P_A1_CG"]) * np.array([self.CHORD, self.WINGSPAN, 0])
+        self.P_A2_CG = np.array(drone_data["P_A2_CG"]) * np.array([self.CHORD, self.WINGSPAN, 0])
 
+        self.PHI_n = drone_data["PHI_n"]
         self.Cd0 = 0.1
         self.Cy0 = 0.1
         self.dR = -0.1 * self.CHORD
         self.PHI_fv = np.diag([self.Cd0, self.Cy0, (2 * np.pi + self.Cd0)])
         self.PHI_mv = np.array([[0, 0, 0], 
-            [0, 0, - self.dR * (2 * np.pi + self.Cd0) / self.CHORD], 
+            [0, 0, -self.dR * (2 * np.pi + self.Cd0) / self.CHORD], 
             [0, self.dR * self.Cy0 / self.WINGSPAN, 0]])
-        self.PHI_mw = 1 / 2 * np.diag([0.5, 0.5, 0.5])
-        self.PHI = np.block([[self.PHI_fv, self.PHI_mv.T], [self.PHI_mv, self.PHI_mw]])
+        self.PHI_fw = self.PHI_mv.T
+        self.PHI_mw = 1/2 * np.diag([0.5, 0.5, 0.5])
+
 
     def thrust(self, dx):
         kp = self.PROP_KP
@@ -55,9 +58,10 @@ class Mavion:
         N = np.sign(dx) * km * dx**2 * np.array([1, 0, 0])
         return np.array([T, N])
 
-    def aero(self, quat, vel, rot, T, de, w):
+    def aero(self, vinf, rot, T, de):
         # data extraction from self struct
         PHI_fv = self.PHI_fv
+        PHI_fw = self.PHI_fw
         PHI_mv = self.PHI_mv
         PHI_mw = self.PHI_mw
         PHI_n  = self.PHI_n
@@ -77,26 +81,22 @@ class Mavion:
         # computation of chord matrix
         B = np.array([ws, chord, ws])
 
-        # freestream velocity computation in body frame
-        vinf = quatrot(vel - w, quat)
         # eta computation
         eta = np.sqrt(vinf@vinf + PHI_n * (B*rot)@(B*rot))
         qv = 1/2*RHO*S*eta
 
         # Force computation
         # airfoil contribution
-        Fb_a = -qv * (PHI_fv@vinf + PHI_mv@(B*rot)) - 1/2*Swet/Sp * PHI_fv@T
+        Fb_a = -qv * (PHI_fv@vinf + PHI_fw@(B*rot)) - 1/2 * Swet/Sp * PHI_fv@T
         # elevon contribution
-        Fb_e = qv * (PHI_fv@np.cross(de*Thetaf, vinf) + PHI_mv@(B*np.cross(de*Thetaf, rot))) \
-                + 1/2*Swet/Sp * PHI_fv@np.cross(de*Thetaf, T)
+        Fb_e = qv * (PHI_fv@np.cross(Thetaf*de, vinf) + PHI_fw@(np.cross(Thetaf*de, B*rot)))  + 1/2 * Swet/Sp * PHI_fv@np.cross(Thetaf*de, T)
         Fb = Fb_a + Fb_e
 
         # Moment computation
         # airfoil contribution
-        Mb_a = -qv * (B*(PHI_mv@vinf) + B*(PHI_mw@(B*rot))) - 1/2*Swet/Sp * B*(PHI_mv@T)
+        Mb_a = -qv * B*(PHI_mv@vinf + PHI_mw@(B*rot)) - 1/2 * Swet/Sp * B*(PHI_mv@T)
         # elevon contribution
-        Mb_e = qv * (B*(PHI_mv@np.cross(de*Thetam, vinf)) + B*(PHI_mw@(B*np.cross(de*Thetam, rot)))) \
-                + 1/2*Swet/Sp * B*(PHI_mv@np.cross(de*Thetam, T))
+        Mb_e = qv * B*(PHI_mv@np.cross(Thetam*de, vinf) + PHI_mw@(np.cross(Thetam*de, B*rot))) + 1/2 * Swet/Sp * B*(PHI_mv@np.cross(Thetam*de, T))
         Mb = Mb_a + Mb_e
 
         return np.array([Fb, Mb])
@@ -124,29 +124,26 @@ class Mavion:
 
         [T1, N1] = self.thrust(u[0])
         [T2, N2] = self.thrust(u[1])
-        [F1, M1] = self.aero(quat, vel, rot, T1, u[2], w)
-        [F2, M2] = self.aero(quat, vel, rot, T2, u[3], w)
+        tau1 = N1 - (rot[0] + u[0]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]])
+        tau2 = N2 - (rot[0] + u[1]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]])
 
-        Fb = T1 + T2 + F1 + F2
-        dvdt = np.array([0, 0, G]) + 1/MASS * quatrot(Fb, quatinv(quat))
+        vinf = quatrot(vel - w, quat)
+        [F1, M1] = self.aero(vinf, rot, T1, u[2])
+        [F2, M2] = self.aero(vinf, rot, T2, u[3])
 
-        # angular velocity derivative
-        Ma = M1 + M2 + np.cross(P_A1_CG, F1) + np.cross(P_A2_CG, F2)
+        Fb = T1 + F1 + T2 + F2
+        dvdt = np.array([0, 0, G]) + 1/MASS * quatrot(Fb, quatinv(quat))
 
-        tau1 = N1 - (rot[0] + u[0]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]])
-        tau2 = N2 - (rot[0] + u[1]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]])
-        Mg = tau1 + tau2 + np.cross(P_P1_CG, T1) + np.cross(P_P2_CG, T2)
-        
-        Mb = Ma + Mg
+        Mb = M1 + tau1 + np.cross(P_A1_CG, F1) + np.cross(P_P1_CG, T1) + M2 + tau2 + np.cross(P_A2_CG, F2) + np.cross(P_P2_CG, T2) 
         drdt = np.linalg.inv(INERTIA) @ (Mb - np.cross(rot, INERTIA@rot))
 
-        # kinematic equations
         dpdt = vel
         dqdt = 1/2 * quatmul(quat, np.concatenate([[0], rot]))
 
         return np.concatenate([dpdt, dqdt, dvdt, drdt])
 
-    def trim(self, vh, vz):
+    def trim(self, target):
+        vh, vz = target
         x = np.zeros(13)
         x[7] = vh
         x[9] = vz
diff --git a/mavion/sac_mavion_1.zip b/mavion/sac_mavion_1.zip
new file mode 100644
index 0000000000000000000000000000000000000000..c6f32c49fc5f05c1f750a783e2adb0c6ee250302
Binary files /dev/null and b/mavion/sac_mavion_1.zip differ
diff --git a/mavion/train.py b/mavion/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..ecb71f3429e03bacef074816ee4a997f6fa1cc3a
--- /dev/null
+++ b/mavion/train.py
@@ -0,0 +1,14 @@
+import numpy as np
+from stable_baselines3 import SAC
+from mavion_env import MavionEnv
+
+
+if __name__ == "__main__":
+
+    env = MavionEnv()
+    env.target = np.array([0, 0, -20, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+
+    model = SAC("MlpPolicy", env, verbose=1)
+    model.learn(total_timesteps=20000, log_interval=10)
+    model.save("sac_mavion_1")
+