improved env with reward et distance

d51909f0 · PASTOR Philippe · eed81bbd · d51909f0 · d51909f0 · d51909f0
Commit d51909f0 authored 6 months ago by PASTOR Philippe 💬
--- a/drone/rotation.py
+++ b/drone/rotation.py
@@ -13,15 +13,14 @@ def inv_hat(m):
 def quat2axang(q):
    a = 2 * np.arccos(q[0])
    n = q[1:4] / np.sqrt(1 - q[0] * q[0])
-    return a, n
+    return a * n
 def dcm2axang(R):
    t = np.trace(R)
    n = 0.5 * inv_hat(R - R.T)
    r = np.linalg.norm(n)
    a = np.arctan2(r, (t - 1) / 2)
-    return a, n / r
+    return a * n / r
 # Quaternions
 def quatmat(p):
@@ -54,8 +53,9 @@ def quatrot(q, u):
    v = quatmul(quatinv(q), quatmul(vect2quat(u), q))
    return v[1:]
+def axang2quat(an):
-def axang2quat(a, n):
+    a = np.linalg.norm(an)
+    n = an / a
    return np.concatenate([[np.cos(a / 2)], n * np.sin(a / 2)])
 def eul2quat(a):
@@ -64,9 +64,10 @@ def eul2quat(a):
    qy = np.array([np.cos(a[2]/2), 0, 0, np.sin(a[2]/2)])
    return quatmul(qy, quatmul(qp, qr))
 # Matrices de Rotation (DCM)
-def axang2dcm(a, n):
+def axang2dcm(an):
+    a = np.linalg.norm(an)
+    n = an / a
    dcm = (1 - np.cos(a)) * np.outer(n, n) + np.cos(a) * np.identity(3) - np.sin(a) * hat(n)
    return dcm

--- a/mavion/__pycache__/mavion_env.cpython-311.pyc
+++ b/mavion/__pycache__/mavion_env.cpython-311.pyc
--- a/mavion/__pycache__/mavion_model.cpython-311.pyc
+++ b/mavion/__pycache__/mavion_model.cpython-311.pyc
--- a/mavion/__pycache__/rotation.cpython-311.pyc
+++ b/mavion/__pycache__/rotation.cpython-311.pyc
--- a/mavion/mavion.yaml
+++ b/mavion/mavion.yaml
@@ -22,5 +22,5 @@ ELEVON_MEFFICIENCY : [0, 0.66, 0]
 ELEVON_FEFFICIENCY : [0, 0.33, 0] 
 PROP_KP : 7.84e-06 
 PROP_KM : 2.400e-7     
-MAX_ROTOR_SPD : 2500  # rad/s
+MAX_ROTOR_SPD : 2500  # rad/s (TBC)
-MAX_FLAP_DEF :  1     # rad
+MAX_FLAP_DEF :  1     # rad (TBC)
--- a/mavion/mavion_env.py
+++ b/mavion/mavion_env.py
@@ -15,22 +15,17 @@ class MavionEnv(gym.Env):
        self.tau = 0.2  # seconds between state updates
        # Angle at which to fail the episode
-        self.pos_threshold = np.array([2.4, 2.4, 2.4])
+        self.pos_threshold = np.array([10, 10, 10])
-        self.ang_threshold = np.array([15, 15, 15]) * deg2rad
        self.quat_threshold = np.array([1, 1, 1, 1])
        self.vel_threshold = np.finfo(np.float32).max * np.ones(3)
        self.rot_threshold = np.finfo(np.float32).max * np.ones(3)
-        high = np.concatenate(
+        high = np.concatenate([
-            [
+                self.pos_threshold,
-                self.pos_threshold * 2,
-                # self.ang_threshold * 2,
                self.quat_threshold,
                self.vel_threshold,
                self.rot_threshold
-            ],
+            ],dtype=np.float32)
-            dtype=np.float32,
-        )
        self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32)
        action_max = np.array([1, 1, 1, 1])
@@ -44,33 +39,33 @@ class MavionEnv(gym.Env):
    def observation(self):
        self.obs = self.state - self.target
+        self.obs[3:7] = quatmul(quatinv(self.state[3:7]), self.target[3:7])
        return self.obs
+    def distance(self, s, t):
+        dpos = np.linalg.norm(t[0:3] - s[0:3]) # distance euclidienne
+        dquat = 1 - abs(sum(t[3:7] * s[3:7]))  # compris entre 0 et 1
+        return dpos + dquat
+    def reward(self):
+        dist = self.distance(self.state, self.target)
+        terminated = dist > 10
+        reward = 10 - dist if not terminated else -10
+        return reward, terminated
+    def step(self, action):
+        x = self.state
+        u = action * self.action_range
+        self.state  = self.mavion.step(x, u, np.zeros(3), self.tau)
+        observation = self.observation()
+        reward, terminated = self.reward()
+        return observation, reward, terminated, False, {}
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.obs = np.zeros(13)
-        low, high = utils.maybe_parse_reset_bounds(options, -0.1, 0.1) 
+        low, high = utils.maybe_parse_reset_bounds(options, -1, 1) 
        self.obs[0:3] = self.np_random.uniform(low=low, high=high, size=(3,))
        self.state = self.target + self.obs
        self.steps_beyond_terminated = None
        return self.obs, {}
-    def distance(self, s, t):
-        return np.linalg.norm(t[0:3] - s[0:3])
-    def cost(self, s, u, t):
-        u_trim = self.mavion.trim(t)
-        s_trim = self.mavion.trim_state(t)
-        J = (s - s_trim).T@Q@(s - s_trim) + (u - u_trim).T@Q@(u - u_trim)
-        return J
-    def step(self, action):
-        x = self.state
-        u = action * self.action_range
-        w = np.zeros(3)
-        new_state = self.mavion.step(x, u, w, self.tau)
-        self.state = new_state
-        terminated = self.distance(self.state, self.target) > 10
-        reward = 1 if not terminated else -10
-        obs = self.observation()
-        return obs, reward, terminated, False, {}
--- a/mavion/mavion_model.py
+++ b/mavion/mavion_model.py
@@ -127,12 +127,12 @@ class Mavion:
        tau1 = N1 - (rot[0] - u[0]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]])
        tau2 = N2 - (rot[0] + u[1]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]])
-        vinf = quatrot(vel - w, quat)
+        vinf = quatrot(quat, vel - w)
        [F1, M1] = self.aero(vinf, rot, T1, u[2])
        [F2, M2] = self.aero(vinf, rot, T2, u[3])
        Fb = T1 + F1 + T2 + F2
-        dvdt = np.array([0, 0, G]) + 1/MASS * quatrot(Fb, quatinv(quat))
+        dvdt = np.array([0, 0, G]) + 1/MASS * quatrot(quatinv(quat), Fb)
        Mb = M1 + tau1 + np.cross(P_A1_CG, F1) + np.cross(P_P1_CG, T1) + M2 + tau2 + np.cross(P_A2_CG, F2) + np.cross(P_P2_CG, T2) 
        drdt = np.linalg.inv(INERTIA) @ (Mb - np.cross(rot, INERTIA@rot))

--- a/mavion/rotation.py
+++ b/mavion/rotation.py
 import numpy as np
 deg2rad = np.pi/180
-rad2deg = 180/np.pi
+rad2deg = 1 / deg2rad
 def hat(x):
    return np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])
@@ -13,15 +13,14 @@ def inv_hat(m):
 def quat2axang(q):
    a = 2 * np.arccos(q[0])
    n = q[1:4] / np.sqrt(1 - q[0] * q[0])
-    return a, n
+    return a * n
 def dcm2axang(R):
    t = np.trace(R)
    n = 0.5 * inv_hat(R - R.T)
    r = np.linalg.norm(n)
    a = np.arctan2(r, (t - 1) / 2)
-    return a, n / r
+    return a * n / r
 # Quaternions
 def quatmat(p):
@@ -50,12 +49,13 @@ def quatinv(q):
 def vect2quat(u):
    return np.concatenate([[0], u])
-def quatrot(u, q):
+def quatrot(q, u):
    v = quatmul(quatinv(q), quatmul(vect2quat(u), q))
    return v[1:]
+def axang2quat(an):
-def axang2quat(a, n):
+    a = np.linalg.norm(an)
+    n = an / a
    return np.concatenate([[np.cos(a / 2)], n * np.sin(a / 2)])
 def eul2quat(a):
@@ -64,9 +64,10 @@ def eul2quat(a):
    qy = np.array([np.cos(a[2]/2), 0, 0, np.sin(a[2]/2)])
    return quatmul(qy, quatmul(qp, qr))
 # Matrices de Rotation (DCM)
-def axang2dcm(a, n):
+def axang2dcm(an):
+    a = np.linalg.norm(an)
+    n = an / a
    dcm = (1 - np.cos(a)) * np.outer(n, n) + np.cos(a) * np.identity(3) - np.sin(a) * hat(n)
    return dcm

--- a/mavion/train.py
+++ b/mavion/train.py
@@ -11,4 +11,3 @@ if __name__ == "__main__":
    model = SAC("MlpPolicy", env, verbose=1)
    model.learn(total_timesteps=20000, log_interval=10)
    model.save("sac_mavion_1")
--- a/rotations/__pycache__/rotation.cpython-311.pyc
+++ b/rotations/__pycache__/rotation.cpython-311.pyc
--- a/rotations/main.py
+++ b/rotations/main.py
+import numpy as np
+from rotation import *
+if __name__ == "__main__":
+    a1 = np.pi/4
+    n1 = np.array([1, 0, 0])
+    rot1 = a1 * n1
+    print(rot1)
+    q1 = axang2quat(rot1)
+    print(q1)
+    teta2 = np.pi/3
--- a/rotations/quaternion.ipynb
+++ b/rotations/quaternion.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from rotation import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.78539816 0.         0.        ]\n",
+      "[0.92387953 0.38268343 0.         0.        ] [0.92387953 0.38268343 0.         0.        ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "a1 = np.pi/4\n",
+    "n1 = np.array([1, 0, 0])\n",
+    "rot1 = a1 * n1\n",
+    "print(rot1)\n",
+    "q1 = axang2quat(rot1)\n",
+    "R1 = axang2dcm(rot1)\n",
+    "q1b = eul2quat(dcm2eul(R1))\n",
+    "print(q1, q1b)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[0.         1.04719755 0.        ]\n",
+      "[0.8660254 0.        0.5       0.       ] [0.8660254 0.        0.5       0.       ]\n",
+      "0.19989685480873443 0.6433291804340877 [-0.33141357  0.46193977 -0.19134172]\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "a2 = np.pi/3\n",
+    "n2 = np.array([0, 1, 0])\n",
+    "rot2 = a2 * n2\n",
+    "print(rot2)\n",
+    "q2 = axang2quat(rot2)\n",
+    "R2 = axang2dcm(rot2)\n",
+    "q2b = eul2quat(dcm2eul(R2))\n",
+    "print(q2, q2b)\n",
+    "\n",
+    "d1 = 1 - abs(sum(q1*q2))\n",
+    "errq = quatmul(quatinv(q1),q2)\n",
+    "d2 = np.arccos(errq[0])\n",
+    "epsq = errq[1:4]\n",
+    "print(d1, d2, epsq)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "base",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
+%% Cell type:code id: tags:
+``` python
+import numpy as np
+from rotation import *
+```
+%% Cell type:code id: tags:
+``` python
+a1 = np.pi/4
+n1 = np.array([1, 0, 0])
+rot1 = a1 * n1
+print(rot1)
+q1 = axang2quat(rot1)
+R1 = axang2dcm(rot1)
+q1b = eul2quat(dcm2eul(R1))
+print(q1, q1b)
+```
+%% Output
+    [0.78539816 0.         0.        ]
+    [0.92387953 0.38268343 0.         0.        ] [0.92387953 0.38268343 0.         0.        ]
+%% Cell type:code id: tags:
+``` python
+a2 = np.pi/3
+n2 = np.array([0, 1, 0])
+rot2 = a2 * n2
+print(rot2)
+q2 = axang2quat(rot2)
+R2 = axang2dcm(rot2)
+q2b = eul2quat(dcm2eul(R2))
+print(q2, q2b)
+d1 = 1 - abs(sum(q1*q2))
+errq = quatmul(quatinv(q1),q2)
+d2 = np.arccos(errq[0])
+epsq = errq[1:4]
+print(d1, d2, epsq)
+```
+%% Output
+    [0.         1.04719755 0.        ]
+    [0.8660254 0.        0.5       0.       ] [0.8660254 0.        0.5       0.       ]
+    0.19989685480873443 0.6433291804340877 [-0.33141357  0.46193977 -0.19134172]
--- a/rotations/rotation.py
+++ b/rotations/rotation.py
+import numpy as np
+deg2rad = np.pi/180
+rad2deg = 1 / deg2rad
+def hat(x):
+    return np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])
+def inv_hat(m):
+    return np.array([m[2, 1], m[0, 2], m[1, 0]])
+# (a, n): a = angle, n = vecteur unitaire (axe de rotation)
+def quat2axang(q):
+    a = 2 * np.arccos(q[0])
+    n = q[1:4] / np.sqrt(1 - q[0] * q[0])
+    return a * n
+def dcm2axang(R):
+    t = np.trace(R)
+    n = 0.5 * inv_hat(R - R.T)
+    r = np.linalg.norm(n)
+    a = np.arctan2(r, (t - 1) / 2)
+    return a * n / r
+# Quaternions
+def quatmat(p):
+    m = np.array(
+        [
+            [p[0], -p[1], -p[2], -p[3]],
+            [p[1],  p[0], -p[3],  p[2]], 
+            [p[2],  p[3],  p[0], -p[1]], 
+            [p[3], -p[2],  p[1],  p[0]]
+        ]
+    )
+    return m
+def quatmul(p, q):
+    return quatmat(p)@q
+def quatnorm(q): 
+    return np.dot(q, q)
+def quatconj(q):
+    return q * np.array([1, -1, -1, -1])
+def quatinv(q):
+    return quatconj(q) / quatnorm(q)
+def vect2quat(u):
+    return np.concatenate([[0], u])
+def quatrot(q, u):
+    v = quatmul(quatinv(q), quatmul(vect2quat(u), q))
+    return v[1:]
+def axang2quat(an):
+    a = np.linalg.norm(an)
+    n = an / a
+    return np.concatenate([[np.cos(a / 2)], n * np.sin(a / 2)])
+def eul2quat(a):
+    qr = np.array([np.cos(a[0]/2), np.sin(a[0]/2), 0, 0])
+    qp = np.array([np.cos(a[1]/2), 0, np.sin(a[1]/2), 0])
+    qy = np.array([np.cos(a[2]/2), 0, 0, np.sin(a[2]/2)])
+    return quatmul(qy, quatmul(qp, qr))
+# Matrices de Rotation (DCM)
+def axang2dcm(an):
+    a = np.linalg.norm(an)
+    n = an / a
+    dcm = (1 - np.cos(a)) * np.outer(n, n) + np.cos(a) * np.identity(3) - np.sin(a) * hat(n)
+    return dcm
+def quat2dcm(q):
+    w = q[1:4]
+    dcm = (2 * np.outer(w, w) + (q[0]**2 - w @ w) * np.identity(3) - 2 * q[0] * hat(w)) / quatnorm(q)
+    return dcm
+def eul2dcm(a):
+    Rx = np.array([[1, 0, 0], [0, np.cos(a[0]), np.sin(a[0])], [0, -np.sin(a[0]), np.cos(a[0])]])
+    Ry = np.array([[np.cos(a[1]), 0, -np.sin(a[1])], [0, 1, 0], [np.sin(a[1]), 0, np.cos(a[1])]])
+    Rz = np.array([[np.cos(a[2]), np.sin(a[2]), 0], [-np.sin(a[2]), np.cos(a[2]), 0], [0, 0, 1]])               
+    return Rx @ Ry @ Rz
+# Angles d'Euler (phi, theta, psi) selon axes (x, y, z) : z en premier
+def quat2eul(q):
+    phi = np.arctan2(2 * (q[0] * q[1] + q[2] * q[3]), q[0]**2 - q[1]**2 - q[2]**2 + q[3]**2)
+    theta = -np.arcsin(2 * (q[1] * q[3] - q[0] * q[2]))
+    psi = np.arctan2(2 * (q[0] * q[3] + q[1] * q[2]), q[0]**2 + q[1]**2 - q[2]**2 - q[3]**2)
+    return np.array([phi, theta, psi])
+def dcm2eul(R):
+    phi = np.arctan2(R[1, 2], R[2, 2])
+    theta = -np.arcsin(R[0, 2])
+    psi = np.arctan2(R[0, 1], R[0, 0])
+    return np.array([phi, theta, psi])