diff --git a/mavion/__pycache__/mavion_env.cpython-311.pyc b/mavion/__pycache__/mavion_env.cpython-311.pyc index 84e9fb241a4f32f92826bf9b05ff6fc6b79eed10..cbc9203e9109be9974532a6673e19dba4818769b 100644 Binary files a/mavion/__pycache__/mavion_env.cpython-311.pyc and b/mavion/__pycache__/mavion_env.cpython-311.pyc differ diff --git a/mavion/__pycache__/mavion_model.cpython-311.pyc b/mavion/__pycache__/mavion_model.cpython-311.pyc index 99265172a594f535caf625c15e36f1e649ca7995..da1a7717668e27a765b555539d9ed0cdbb20e3ef 100644 Binary files a/mavion/__pycache__/mavion_model.cpython-311.pyc and b/mavion/__pycache__/mavion_model.cpython-311.pyc differ diff --git a/mavion/mavion_env.py b/mavion/mavion_env.py index abf7fd0b1748d9ae151690c53e27efd1db8f24d5..fb76cc37de234f3d96a6440daad94ade3ab49285 100644 --- a/mavion/mavion_env.py +++ b/mavion/mavion_env.py @@ -34,18 +34,21 @@ class MavionEnv(gym.Env): self.action_range = np.array([mavion.MAX_ROTOR_SPD, mavion.MAX_ROTOR_SPD, mavion.MAX_FLAP_DEF, mavion.MAX_FLAP_DEF]) self.state = np.zeros(13) - self.target = np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + self.target = np.zeros(13) self.steps_beyond_terminated = None def observation(self): - self.obs = self.state - self.target - self.obs[3:7] = quatmul(quatinv(self.state[3:7]), self.target[3:7]) + obs = self.state - self.target + obs[3:7] = quatmul(quatinv(self.state[3:7]), self.target[3:7]) + self.obs = obs return self.obs def distance(self, s, t): - dpos = np.linalg.norm(t[0:3] - s[0:3]) # distance euclidienne - dquat = 1 - abs(sum(t[3:7] * s[3:7])) # compris entre 0 et 1 - return dpos + dquat + d_pos = np.linalg.norm(t[0:3] - s[0:3]) # distance euclidienne + d_quat = 1 - abs(sum(t[3:7] * s[3:7])) # compris entre 0 et 1 + d_vel = np.linalg.norm(t[7:10] - s[7:10]) # distance euclidienne + d_rot = np.linalg.norm(t[10:13] - s[10:13]) # distance euclidienne + return d_pos + d_quat*10 + d_vel*0 + d_rot*0 def reward(self): dist = self.distance(self.state, self.target) @@ -57,15 +60,15 @@ class MavionEnv(gym.Env): x = self.state u = action * self.action_range self.state = self.mavion.step(x, u, np.zeros(3), self.tau) - observation = self.observation() + self.obs = self.observation() reward, terminated = self.reward() - return observation, reward, terminated, False, {} + return self.obs, reward, terminated, False, {} def reset(self, seed=None, options=None): super().reset(seed=seed) - self.obs = np.zeros(13) - low, high = utils.maybe_parse_reset_bounds(options, -1, 1) - self.obs[0:3] = self.np_random.uniform(low=low, high=high, size=(3,)) - self.state = self.target + self.obs + low, high = utils.maybe_parse_reset_bounds(options, -1, 1) + delta = self.np_random.uniform(low=low, high=high, size=(3,)) + self.state = self.target + np.concatenate([delta, np.zeros(10)]) + self.obs = self.observation() self.steps_beyond_terminated = None return self.obs, {} diff --git a/mavion/mavion_model.py b/mavion/mavion_model.py index ded1e352c3f6d74d76521c3ce0fbd0023884f8d6..81a472b0e378355ab7db729c9fff259da5d587e6 100644 --- a/mavion/mavion_model.py +++ b/mavion/mavion_model.py @@ -121,7 +121,7 @@ class Mavion: quat = x[3:7] vel = x[7:10] rot = x[10:13] - + [T1, N1] = self.thrust(-u[0]) [T2, N2] = self.thrust(u[1]) tau1 = N1 - (rot[0] - u[0]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]]) diff --git a/mavion/sac_mavion_1.zip b/mavion/sac_mavion_1.zip index c6f32c49fc5f05c1f750a783e2adb0c6ee250302..8b99de1c21b5e3bfd6a57d15309a1b32651317ee 100644 Binary files a/mavion/sac_mavion_1.zip and b/mavion/sac_mavion_1.zip differ diff --git a/mavion/train.py b/mavion/train.py index 59e6d3a7ca37ee9899df6b9d0aca5fa59e8811b2..b3c16f988a479d614fc1f5e44c1afa0b04431592 100644 --- a/mavion/train.py +++ b/mavion/train.py @@ -1,12 +1,22 @@ import numpy as np from stable_baselines3 import SAC from mavion_env import MavionEnv +from rotation import * if __name__ == "__main__": + pos_target = np.array([0, 0, -20]) + psi_target = 45 + eul_target = np.array([0, 90, 45])*deg2rad + quat_target = eul2quat(eul_target) + env = MavionEnv() - env.target = np.array([0, 0, -20, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + env.target = np.concatenate([pos_target, quat_target, np.zeros(6)]) + print(env.target) + obs = env.reset() + print(env.state) + print(obs) model = SAC("MlpPolicy", env, verbose=1) model.learn(total_timesteps=20000, log_interval=10) diff --git a/rotations/quaternion.ipynb b/rotations/quaternion.ipynb index 718fc4e088e4adce66a743397343a8d1e1da47a0..8376d154a25f3ecd54adcc392c8dbd6c42eeeee6 100644 --- a/rotations/quaternion.ipynb +++ b/rotations/quaternion.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 14, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -12,7 +12,16 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "s1 = np.array()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -37,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -46,7 +55,7 @@ "text": [ "[0. 1.04719755 0. ]\n", "[0.8660254 0. 0.5 0. ] [0.8660254 0. 0.5 0. ]\n", - "0.19989685480873443 0.6433291804340877 [-0.33141357 0.46193977 -0.19134172]\n" + "0.19989685480873443 0.6433291804340877 [ 0.80010315 -0.33141357 0.46193977 -0.19134172]\n" ] } ], @@ -61,11 +70,12 @@ "q2b = eul2quat(dcm2eul(R2))\n", "print(q2, q2b)\n", "\n", + "\n", "d1 = 1 - abs(sum(q1*q2))\n", "errq = quatmul(quatinv(q1),q2)\n", "d2 = np.arccos(errq[0])\n", "epsq = errq[1:4]\n", - "print(d1, d2, epsq)\n" + "print(d1, d2, errq)\n" ] } ],