Skip to content
Snippets Groups Projects
Commit d51909f0 authored by PASTOR Philippe's avatar PASTOR Philippe 💬
Browse files

improved env with reward et distance

parent eed81bbd
No related branches found
No related tags found
1 merge request!2Phil
...@@ -13,15 +13,14 @@ def inv_hat(m): ...@@ -13,15 +13,14 @@ def inv_hat(m):
def quat2axang(q): def quat2axang(q):
a = 2 * np.arccos(q[0]) a = 2 * np.arccos(q[0])
n = q[1:4] / np.sqrt(1 - q[0] * q[0]) n = q[1:4] / np.sqrt(1 - q[0] * q[0])
return a, n return a * n
def dcm2axang(R): def dcm2axang(R):
t = np.trace(R) t = np.trace(R)
n = 0.5 * inv_hat(R - R.T) n = 0.5 * inv_hat(R - R.T)
r = np.linalg.norm(n) r = np.linalg.norm(n)
a = np.arctan2(r, (t - 1) / 2) a = np.arctan2(r, (t - 1) / 2)
return a, n / r return a * n / r
# Quaternions # Quaternions
def quatmat(p): def quatmat(p):
...@@ -54,8 +53,9 @@ def quatrot(q, u): ...@@ -54,8 +53,9 @@ def quatrot(q, u):
v = quatmul(quatinv(q), quatmul(vect2quat(u), q)) v = quatmul(quatinv(q), quatmul(vect2quat(u), q))
return v[1:] return v[1:]
def axang2quat(an):
def axang2quat(a, n): a = np.linalg.norm(an)
n = an / a
return np.concatenate([[np.cos(a / 2)], n * np.sin(a / 2)]) return np.concatenate([[np.cos(a / 2)], n * np.sin(a / 2)])
def eul2quat(a): def eul2quat(a):
...@@ -64,9 +64,10 @@ def eul2quat(a): ...@@ -64,9 +64,10 @@ def eul2quat(a):
qy = np.array([np.cos(a[2]/2), 0, 0, np.sin(a[2]/2)]) qy = np.array([np.cos(a[2]/2), 0, 0, np.sin(a[2]/2)])
return quatmul(qy, quatmul(qp, qr)) return quatmul(qy, quatmul(qp, qr))
# Matrices de Rotation (DCM) # Matrices de Rotation (DCM)
def axang2dcm(a, n): def axang2dcm(an):
a = np.linalg.norm(an)
n = an / a
dcm = (1 - np.cos(a)) * np.outer(n, n) + np.cos(a) * np.identity(3) - np.sin(a) * hat(n) dcm = (1 - np.cos(a)) * np.outer(n, n) + np.cos(a) * np.identity(3) - np.sin(a) * hat(n)
return dcm return dcm
......
No preview for this file type
No preview for this file type
No preview for this file type
...@@ -22,5 +22,5 @@ ELEVON_MEFFICIENCY : [0, 0.66, 0] ...@@ -22,5 +22,5 @@ ELEVON_MEFFICIENCY : [0, 0.66, 0]
ELEVON_FEFFICIENCY : [0, 0.33, 0] ELEVON_FEFFICIENCY : [0, 0.33, 0]
PROP_KP : 7.84e-06 PROP_KP : 7.84e-06
PROP_KM : 2.400e-7 PROP_KM : 2.400e-7
MAX_ROTOR_SPD : 2500 # rad/s MAX_ROTOR_SPD : 2500 # rad/s (TBC)
MAX_FLAP_DEF : 1 # rad MAX_FLAP_DEF : 1 # rad (TBC)
...@@ -15,22 +15,17 @@ class MavionEnv(gym.Env): ...@@ -15,22 +15,17 @@ class MavionEnv(gym.Env):
self.tau = 0.2 # seconds between state updates self.tau = 0.2 # seconds between state updates
# Angle at which to fail the episode # Angle at which to fail the episode
self.pos_threshold = np.array([2.4, 2.4, 2.4]) self.pos_threshold = np.array([10, 10, 10])
self.ang_threshold = np.array([15, 15, 15]) * deg2rad
self.quat_threshold = np.array([1, 1, 1, 1]) self.quat_threshold = np.array([1, 1, 1, 1])
self.vel_threshold = np.finfo(np.float32).max * np.ones(3) self.vel_threshold = np.finfo(np.float32).max * np.ones(3)
self.rot_threshold = np.finfo(np.float32).max * np.ones(3) self.rot_threshold = np.finfo(np.float32).max * np.ones(3)
high = np.concatenate( high = np.concatenate([
[ self.pos_threshold,
self.pos_threshold * 2,
# self.ang_threshold * 2,
self.quat_threshold, self.quat_threshold,
self.vel_threshold, self.vel_threshold,
self.rot_threshold self.rot_threshold
], ],dtype=np.float32)
dtype=np.float32,
)
self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32) self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32)
action_max = np.array([1, 1, 1, 1]) action_max = np.array([1, 1, 1, 1])
...@@ -44,33 +39,33 @@ class MavionEnv(gym.Env): ...@@ -44,33 +39,33 @@ class MavionEnv(gym.Env):
def observation(self): def observation(self):
self.obs = self.state - self.target self.obs = self.state - self.target
self.obs[3:7] = quatmul(quatinv(self.state[3:7]), self.target[3:7])
return self.obs return self.obs
def distance(self, s, t):
dpos = np.linalg.norm(t[0:3] - s[0:3]) # distance euclidienne
dquat = 1 - abs(sum(t[3:7] * s[3:7])) # compris entre 0 et 1
return dpos + dquat
def reward(self):
dist = self.distance(self.state, self.target)
terminated = dist > 10
reward = 10 - dist if not terminated else -10
return reward, terminated
def step(self, action):
x = self.state
u = action * self.action_range
self.state = self.mavion.step(x, u, np.zeros(3), self.tau)
observation = self.observation()
reward, terminated = self.reward()
return observation, reward, terminated, False, {}
def reset(self, seed=None, options=None): def reset(self, seed=None, options=None):
super().reset(seed=seed) super().reset(seed=seed)
self.obs = np.zeros(13) self.obs = np.zeros(13)
low, high = utils.maybe_parse_reset_bounds(options, -0.1, 0.1) low, high = utils.maybe_parse_reset_bounds(options, -1, 1)
self.obs[0:3] = self.np_random.uniform(low=low, high=high, size=(3,)) self.obs[0:3] = self.np_random.uniform(low=low, high=high, size=(3,))
self.state = self.target + self.obs self.state = self.target + self.obs
self.steps_beyond_terminated = None self.steps_beyond_terminated = None
return self.obs, {} return self.obs, {}
def distance(self, s, t):
return np.linalg.norm(t[0:3] - s[0:3])
def cost(self, s, u, t):
u_trim = self.mavion.trim(t)
s_trim = self.mavion.trim_state(t)
J = (s - s_trim).T@Q@(s - s_trim) + (u - u_trim).T@Q@(u - u_trim)
return J
def step(self, action):
x = self.state
u = action * self.action_range
w = np.zeros(3)
new_state = self.mavion.step(x, u, w, self.tau)
self.state = new_state
terminated = self.distance(self.state, self.target) > 10
reward = 1 if not terminated else -10
obs = self.observation()
return obs, reward, terminated, False, {}
...@@ -127,12 +127,12 @@ class Mavion: ...@@ -127,12 +127,12 @@ class Mavion:
tau1 = N1 - (rot[0] - u[0]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]]) tau1 = N1 - (rot[0] - u[0]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]])
tau2 = N2 - (rot[0] + u[1]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]]) tau2 = N2 - (rot[0] + u[1]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]])
vinf = quatrot(vel - w, quat) vinf = quatrot(quat, vel - w)
[F1, M1] = self.aero(vinf, rot, T1, u[2]) [F1, M1] = self.aero(vinf, rot, T1, u[2])
[F2, M2] = self.aero(vinf, rot, T2, u[3]) [F2, M2] = self.aero(vinf, rot, T2, u[3])
Fb = T1 + F1 + T2 + F2 Fb = T1 + F1 + T2 + F2
dvdt = np.array([0, 0, G]) + 1/MASS * quatrot(Fb, quatinv(quat)) dvdt = np.array([0, 0, G]) + 1/MASS * quatrot(quatinv(quat), Fb)
Mb = M1 + tau1 + np.cross(P_A1_CG, F1) + np.cross(P_P1_CG, T1) + M2 + tau2 + np.cross(P_A2_CG, F2) + np.cross(P_P2_CG, T2) Mb = M1 + tau1 + np.cross(P_A1_CG, F1) + np.cross(P_P1_CG, T1) + M2 + tau2 + np.cross(P_A2_CG, F2) + np.cross(P_P2_CG, T2)
drdt = np.linalg.inv(INERTIA) @ (Mb - np.cross(rot, INERTIA@rot)) drdt = np.linalg.inv(INERTIA) @ (Mb - np.cross(rot, INERTIA@rot))
......
import numpy as np import numpy as np
deg2rad = np.pi/180 deg2rad = np.pi/180
rad2deg = 180/np.pi rad2deg = 1 / deg2rad
def hat(x): def hat(x):
return np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]]) return np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])
...@@ -13,15 +13,14 @@ def inv_hat(m): ...@@ -13,15 +13,14 @@ def inv_hat(m):
def quat2axang(q): def quat2axang(q):
a = 2 * np.arccos(q[0]) a = 2 * np.arccos(q[0])
n = q[1:4] / np.sqrt(1 - q[0] * q[0]) n = q[1:4] / np.sqrt(1 - q[0] * q[0])
return a, n return a * n
def dcm2axang(R): def dcm2axang(R):
t = np.trace(R) t = np.trace(R)
n = 0.5 * inv_hat(R - R.T) n = 0.5 * inv_hat(R - R.T)
r = np.linalg.norm(n) r = np.linalg.norm(n)
a = np.arctan2(r, (t - 1) / 2) a = np.arctan2(r, (t - 1) / 2)
return a, n / r return a * n / r
# Quaternions # Quaternions
def quatmat(p): def quatmat(p):
...@@ -50,12 +49,13 @@ def quatinv(q): ...@@ -50,12 +49,13 @@ def quatinv(q):
def vect2quat(u): def vect2quat(u):
return np.concatenate([[0], u]) return np.concatenate([[0], u])
def quatrot(u, q): def quatrot(q, u):
v = quatmul(quatinv(q), quatmul(vect2quat(u), q)) v = quatmul(quatinv(q), quatmul(vect2quat(u), q))
return v[1:] return v[1:]
def axang2quat(an):
def axang2quat(a, n): a = np.linalg.norm(an)
n = an / a
return np.concatenate([[np.cos(a / 2)], n * np.sin(a / 2)]) return np.concatenate([[np.cos(a / 2)], n * np.sin(a / 2)])
def eul2quat(a): def eul2quat(a):
...@@ -64,9 +64,10 @@ def eul2quat(a): ...@@ -64,9 +64,10 @@ def eul2quat(a):
qy = np.array([np.cos(a[2]/2), 0, 0, np.sin(a[2]/2)]) qy = np.array([np.cos(a[2]/2), 0, 0, np.sin(a[2]/2)])
return quatmul(qy, quatmul(qp, qr)) return quatmul(qy, quatmul(qp, qr))
# Matrices de Rotation (DCM) # Matrices de Rotation (DCM)
def axang2dcm(a, n): def axang2dcm(an):
a = np.linalg.norm(an)
n = an / a
dcm = (1 - np.cos(a)) * np.outer(n, n) + np.cos(a) * np.identity(3) - np.sin(a) * hat(n) dcm = (1 - np.cos(a)) * np.outer(n, n) + np.cos(a) * np.identity(3) - np.sin(a) * hat(n)
return dcm return dcm
......
...@@ -11,4 +11,3 @@ if __name__ == "__main__": ...@@ -11,4 +11,3 @@ if __name__ == "__main__":
model = SAC("MlpPolicy", env, verbose=1) model = SAC("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=20000, log_interval=10) model.learn(total_timesteps=20000, log_interval=10)
model.save("sac_mavion_1") model.save("sac_mavion_1")
File added
import numpy as np
from rotation import *
if __name__ == "__main__":
a1 = np.pi/4
n1 = np.array([1, 0, 0])
rot1 = a1 * n1
print(rot1)
q1 = axang2quat(rot1)
print(q1)
teta2 = np.pi/3
%% Cell type:code id: tags:
``` python
import numpy as np
from rotation import *
```
%% Cell type:code id: tags:
``` python
a1 = np.pi/4
n1 = np.array([1, 0, 0])
rot1 = a1 * n1
print(rot1)
q1 = axang2quat(rot1)
R1 = axang2dcm(rot1)
q1b = eul2quat(dcm2eul(R1))
print(q1, q1b)
```
%% Output
[0.78539816 0. 0. ]
[0.92387953 0.38268343 0. 0. ] [0.92387953 0.38268343 0. 0. ]
%% Cell type:code id: tags:
``` python
a2 = np.pi/3
n2 = np.array([0, 1, 0])
rot2 = a2 * n2
print(rot2)
q2 = axang2quat(rot2)
R2 = axang2dcm(rot2)
q2b = eul2quat(dcm2eul(R2))
print(q2, q2b)
d1 = 1 - abs(sum(q1*q2))
errq = quatmul(quatinv(q1),q2)
d2 = np.arccos(errq[0])
epsq = errq[1:4]
print(d1, d2, epsq)
```
%% Output
[0. 1.04719755 0. ]
[0.8660254 0. 0.5 0. ] [0.8660254 0. 0.5 0. ]
0.19989685480873443 0.6433291804340877 [-0.33141357 0.46193977 -0.19134172]
import numpy as np
deg2rad = np.pi/180
rad2deg = 1 / deg2rad
def hat(x):
return np.array([[0, -x[2], x[1]], [x[2], 0, -x[0]], [-x[1], x[0], 0]])
def inv_hat(m):
return np.array([m[2, 1], m[0, 2], m[1, 0]])
# (a, n): a = angle, n = vecteur unitaire (axe de rotation)
def quat2axang(q):
a = 2 * np.arccos(q[0])
n = q[1:4] / np.sqrt(1 - q[0] * q[0])
return a * n
def dcm2axang(R):
t = np.trace(R)
n = 0.5 * inv_hat(R - R.T)
r = np.linalg.norm(n)
a = np.arctan2(r, (t - 1) / 2)
return a * n / r
# Quaternions
def quatmat(p):
m = np.array(
[
[p[0], -p[1], -p[2], -p[3]],
[p[1], p[0], -p[3], p[2]],
[p[2], p[3], p[0], -p[1]],
[p[3], -p[2], p[1], p[0]]
]
)
return m
def quatmul(p, q):
return quatmat(p)@q
def quatnorm(q):
return np.dot(q, q)
def quatconj(q):
return q * np.array([1, -1, -1, -1])
def quatinv(q):
return quatconj(q) / quatnorm(q)
def vect2quat(u):
return np.concatenate([[0], u])
def quatrot(q, u):
v = quatmul(quatinv(q), quatmul(vect2quat(u), q))
return v[1:]
def axang2quat(an):
a = np.linalg.norm(an)
n = an / a
return np.concatenate([[np.cos(a / 2)], n * np.sin(a / 2)])
def eul2quat(a):
qr = np.array([np.cos(a[0]/2), np.sin(a[0]/2), 0, 0])
qp = np.array([np.cos(a[1]/2), 0, np.sin(a[1]/2), 0])
qy = np.array([np.cos(a[2]/2), 0, 0, np.sin(a[2]/2)])
return quatmul(qy, quatmul(qp, qr))
# Matrices de Rotation (DCM)
def axang2dcm(an):
a = np.linalg.norm(an)
n = an / a
dcm = (1 - np.cos(a)) * np.outer(n, n) + np.cos(a) * np.identity(3) - np.sin(a) * hat(n)
return dcm
def quat2dcm(q):
w = q[1:4]
dcm = (2 * np.outer(w, w) + (q[0]**2 - w @ w) * np.identity(3) - 2 * q[0] * hat(w)) / quatnorm(q)
return dcm
def eul2dcm(a):
Rx = np.array([[1, 0, 0], [0, np.cos(a[0]), np.sin(a[0])], [0, -np.sin(a[0]), np.cos(a[0])]])
Ry = np.array([[np.cos(a[1]), 0, -np.sin(a[1])], [0, 1, 0], [np.sin(a[1]), 0, np.cos(a[1])]])
Rz = np.array([[np.cos(a[2]), np.sin(a[2]), 0], [-np.sin(a[2]), np.cos(a[2]), 0], [0, 0, 1]])
return Rx @ Ry @ Rz
# Angles d'Euler (phi, theta, psi) selon axes (x, y, z) : z en premier
def quat2eul(q):
phi = np.arctan2(2 * (q[0] * q[1] + q[2] * q[3]), q[0]**2 - q[1]**2 - q[2]**2 + q[3]**2)
theta = -np.arcsin(2 * (q[1] * q[3] - q[0] * q[2]))
psi = np.arctan2(2 * (q[0] * q[3] + q[1] * q[2]), q[0]**2 + q[1]**2 - q[2]**2 - q[3]**2)
return np.array([phi, theta, psi])
def dcm2eul(R):
phi = np.arctan2(R[1, 2], R[2, 2])
theta = -np.arcsin(R[0, 2])
psi = np.arctan2(R[0, 1], R[0, 0])
return np.array([phi, theta, psi])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment