diff --git a/drone/__pycache__/drone_env.cpython-311.pyc b/drone/__pycache__/drone_env.cpython-311.pyc index 9ad793b14e6ee6e49c10bfb224e26dade36ddac3..0271c1870278ae0f27a92ccd283dc1f7d3faff37 100644 Binary files a/drone/__pycache__/drone_env.cpython-311.pyc and b/drone/__pycache__/drone_env.cpython-311.pyc differ diff --git a/mavion/__pycache__/mavion_env.cpython-311.pyc b/mavion/__pycache__/mavion_env.cpython-311.pyc index 2a9bdc5489891a76c2de6e78b6aa807c5218e45b..435e0f3aba480aad7c3a3c015776c7db12a0a6bc 100644 Binary files a/mavion/__pycache__/mavion_env.cpython-311.pyc and b/mavion/__pycache__/mavion_env.cpython-311.pyc differ diff --git a/mavion/__pycache__/mavion_model.cpython-311.pyc b/mavion/__pycache__/mavion_model.cpython-311.pyc index 24e16ea710566fb8fa11f9046b2484f6a0219c2b..f441af6fb0fcd84230ee54145cbb9c44dc100361 100644 Binary files a/mavion/__pycache__/mavion_model.cpython-311.pyc and b/mavion/__pycache__/mavion_model.cpython-311.pyc differ diff --git a/mavion/main.py b/mavion/main.py index 6e2960906c94b9351529f7e4ea51faa270cd3fc6..89fb6b538c99c9db154e2660e76c09039703cf10 100644 --- a/mavion/main.py +++ b/mavion/main.py @@ -19,12 +19,14 @@ def uramp(t, t0) : if __name__ == "__main__": mavion_env = MavionEnv() - vh = 0 + vh = 10 vz = 0 cap = 0 omega = 0 - dx, de, theta = mavion_env.mavion.trim(vh, vz) + target = [vh, vz] + + dx, de, theta = mavion_env.mavion.trim(target) theta = (theta + np.pi) % (2*np.pi) - np.pi print(dx, de, theta*57.3) diff --git a/mavion/mavion.yaml b/mavion/mavion.yaml index 63ef25c774aa46d725b1d894e07d057ea883ecab..9782da1a57e6a467e9f5ab979eb5961f2426b832 100644 --- a/mavion/mavion.yaml +++ b/mavion/mavion.yaml @@ -22,3 +22,5 @@ ELEVON_MEFFICIENCY : [0, 0.66, 0] ELEVON_FEFFICIENCY : [0, 0.33, 0] PROP_KP : 7.84e-06 PROP_KM : 2.400e-7 +MAX_ROTOR_SPD : 2500 # rad/s +MAX_FLAP_DEF : 1 # rad diff --git a/mavion/mavion_env.py b/mavion/mavion_env.py index e7ad537ed17b72f382da1c2bb24423bb2671cccd..1b2b72ed02ff5dc918feead797e83abf7abc24dd 100644 --- a/mavion/mavion_env.py +++ b/mavion/mavion_env.py @@ -4,11 +4,14 @@ from gymnasium.envs.classic_control import utils from mavion_model import Mavion from rotation import * + +deg2rad = np.pi / 180 + class MavionEnv(gym.Env): def __init__(self, mavion=Mavion(), render_mode=None): - self.tau = 0.02 # seconds between state updates + self.tau = 0.2 # seconds between state updates # Angle at which to fail the episode self.pos_threshold = np.array([2.4, 2.4, 2.4]) @@ -28,29 +31,40 @@ class MavionEnv(gym.Env): dtype=np.float32, ) self.observation_space = gym.spaces.Box(-high, high, dtype=np.float32) - self.action_space = gym.spaces.Box(0, 1, shape=(4,), dtype=np.float32) - self.target = np.zeros(13) + self.action_space = gym.spaces.Box(-1, 1, shape=(4,), dtype=np.float32) + self.action_range = np.array([mavion.MAX_ROTOR_SPD, mavion.MAX_ROTOR_SPD, mavion.MAX_FLAP_DEF, mavion.MAX_FLAP_DEF]) + self.state = np.zeros(13) - self.obs = np.zeros(13) + self.target = np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + self.obs = self.state - self.target self.steps_beyond_terminated = None self.mavion = mavion def reset(self, seed=None, options=None): super().reset(seed=seed) - low, high = utils.maybe_parse_reset_bounds(options, -0.05, 0.05) - self.target = np.zeros(13) - self.obs = self.np_random.uniform(low=low, high=high, size=(13,)) - self.state = self.obs + self.target + self.obs = np.zeros(13) + low, high = utils.maybe_parse_reset_bounds(options, -0.1, 0.1) + self.obs[0:3] = self.np_random.uniform(low=low, high=high, size=(3,)) + self.state = self.target + self.obs self.steps_beyond_terminated = None return self.obs, {} + def distance(self, s, t): + return np.linalg.norm(t[0:3] - s[0:3]) + + def cost(self, s, u, t): + u_trim = self.mavion.trim(t) + s_trim = self.mavion.trim_state(t) + J = (s - s_trim).T@Q@(s - s_trim) + (u - u_trim).T@Q@(u - u_trim) + return J + def step(self, action): x = self.state - u = action + u = action * self.action_range w = np.zeros(3) new_state = self.mavion.step(x, u, w, self.tau) self.state = new_state - terminated = np.array_equal(self.state, self.target) - reward = 1 if terminated else 0 # Binary sparse rewards + terminated = self.distance(self.state, self.target) > 10 + reward = 1 if not terminated else -10 self.obs = self.state - self.target return self.obs, reward, terminated, False, {} diff --git a/mavion/mavion_model.py b/mavion/mavion_model.py index 4bc758abf25e538435fbf9c365228baf93c1325b..fe31db5e871ff63134208556c2a5c215ca13b067 100644 --- a/mavion/mavion_model.py +++ b/mavion/mavion_model.py @@ -28,23 +28,26 @@ class Mavion: self.ELEVON_MEFFICIENCY = np.array(drone_data['ELEVON_MEFFICIENCY']) self.ELEVON_FEFFICIENCY = np.array(drone_data['ELEVON_FEFFICIENCY']) - self.PHI_n = drone_data["PHI_n"] + self.MAX_ROTOR_SPD = drone_data['MAX_ROTOR_SPD'] + self.MAX_FLAP_DEF = drone_data['MAX_FLAP_DEF'] # geometric parameters - self.P_P1_CG = np.array(drone_data["P_P1_CG"]) * self.CHORD - self.P_P2_CG = np.array(drone_data["P_P2_CG"]) * self.CHORD - self.P_A1_CG = np.array(drone_data["P_A1_CG"]) * self.CHORD - self.P_A2_CG = np.array(drone_data["P_A2_CG"]) * self.CHORD + self.P_P1_CG = np.array(drone_data["P_P1_CG"]) * np.array([self.CHORD, self.WINGSPAN, 0]) + self.P_P2_CG = np.array(drone_data["P_P2_CG"]) * np.array([self.CHORD, self.WINGSPAN, 0]) + self.P_A1_CG = np.array(drone_data["P_A1_CG"]) * np.array([self.CHORD, self.WINGSPAN, 0]) + self.P_A2_CG = np.array(drone_data["P_A2_CG"]) * np.array([self.CHORD, self.WINGSPAN, 0]) + self.PHI_n = drone_data["PHI_n"] self.Cd0 = 0.1 self.Cy0 = 0.1 self.dR = -0.1 * self.CHORD self.PHI_fv = np.diag([self.Cd0, self.Cy0, (2 * np.pi + self.Cd0)]) self.PHI_mv = np.array([[0, 0, 0], - [0, 0, - self.dR * (2 * np.pi + self.Cd0) / self.CHORD], + [0, 0, -self.dR * (2 * np.pi + self.Cd0) / self.CHORD], [0, self.dR * self.Cy0 / self.WINGSPAN, 0]]) - self.PHI_mw = 1 / 2 * np.diag([0.5, 0.5, 0.5]) - self.PHI = np.block([[self.PHI_fv, self.PHI_mv.T], [self.PHI_mv, self.PHI_mw]]) + self.PHI_fw = self.PHI_mv.T + self.PHI_mw = 1/2 * np.diag([0.5, 0.5, 0.5]) + def thrust(self, dx): kp = self.PROP_KP @@ -55,9 +58,10 @@ class Mavion: N = np.sign(dx) * km * dx**2 * np.array([1, 0, 0]) return np.array([T, N]) - def aero(self, quat, vel, rot, T, de, w): + def aero(self, vinf, rot, T, de): # data extraction from self struct PHI_fv = self.PHI_fv + PHI_fw = self.PHI_fw PHI_mv = self.PHI_mv PHI_mw = self.PHI_mw PHI_n = self.PHI_n @@ -77,26 +81,22 @@ class Mavion: # computation of chord matrix B = np.array([ws, chord, ws]) - # freestream velocity computation in body frame - vinf = quatrot(vel - w, quat) # eta computation eta = np.sqrt(vinf@vinf + PHI_n * (B*rot)@(B*rot)) qv = 1/2*RHO*S*eta # Force computation # airfoil contribution - Fb_a = -qv * (PHI_fv@vinf + PHI_mv@(B*rot)) - 1/2*Swet/Sp * PHI_fv@T + Fb_a = -qv * (PHI_fv@vinf + PHI_fw@(B*rot)) - 1/2 * Swet/Sp * PHI_fv@T # elevon contribution - Fb_e = qv * (PHI_fv@np.cross(de*Thetaf, vinf) + PHI_mv@(B*np.cross(de*Thetaf, rot))) \ - + 1/2*Swet/Sp * PHI_fv@np.cross(de*Thetaf, T) + Fb_e = qv * (PHI_fv@np.cross(Thetaf*de, vinf) + PHI_fw@(np.cross(Thetaf*de, B*rot))) + 1/2 * Swet/Sp * PHI_fv@np.cross(Thetaf*de, T) Fb = Fb_a + Fb_e # Moment computation # airfoil contribution - Mb_a = -qv * (B*(PHI_mv@vinf) + B*(PHI_mw@(B*rot))) - 1/2*Swet/Sp * B*(PHI_mv@T) + Mb_a = -qv * B*(PHI_mv@vinf + PHI_mw@(B*rot)) - 1/2 * Swet/Sp * B*(PHI_mv@T) # elevon contribution - Mb_e = qv * (B*(PHI_mv@np.cross(de*Thetam, vinf)) + B*(PHI_mw@(B*np.cross(de*Thetam, rot)))) \ - + 1/2*Swet/Sp * B*(PHI_mv@np.cross(de*Thetam, T)) + Mb_e = qv * B*(PHI_mv@np.cross(Thetam*de, vinf) + PHI_mw@(np.cross(Thetam*de, B*rot))) + 1/2 * Swet/Sp * B*(PHI_mv@np.cross(Thetam*de, T)) Mb = Mb_a + Mb_e return np.array([Fb, Mb]) @@ -124,29 +124,26 @@ class Mavion: [T1, N1] = self.thrust(u[0]) [T2, N2] = self.thrust(u[1]) - [F1, M1] = self.aero(quat, vel, rot, T1, u[2], w) - [F2, M2] = self.aero(quat, vel, rot, T2, u[3], w) + tau1 = N1 - (rot[0] + u[0]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]]) + tau2 = N2 - (rot[0] + u[1]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]]) - Fb = T1 + T2 + F1 + F2 - dvdt = np.array([0, 0, G]) + 1/MASS * quatrot(Fb, quatinv(quat)) + vinf = quatrot(vel - w, quat) + [F1, M1] = self.aero(vinf, rot, T1, u[2]) + [F2, M2] = self.aero(vinf, rot, T2, u[3]) - # angular velocity derivative - Ma = M1 + M2 + np.cross(P_A1_CG, F1) + np.cross(P_A2_CG, F2) + Fb = T1 + F1 + T2 + F2 + dvdt = np.array([0, 0, G]) + 1/MASS * quatrot(Fb, quatinv(quat)) - tau1 = N1 - (rot[0] + u[0]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]]) - tau2 = N2 - (rot[0] + u[1]) * (INERTIA_PROP_X - INERTIA_PROP_N) * np.array([0, rot[2], -rot[1]]) - Mg = tau1 + tau2 + np.cross(P_P1_CG, T1) + np.cross(P_P2_CG, T2) - - Mb = Ma + Mg + Mb = M1 + tau1 + np.cross(P_A1_CG, F1) + np.cross(P_P1_CG, T1) + M2 + tau2 + np.cross(P_A2_CG, F2) + np.cross(P_P2_CG, T2) drdt = np.linalg.inv(INERTIA) @ (Mb - np.cross(rot, INERTIA@rot)) - # kinematic equations dpdt = vel dqdt = 1/2 * quatmul(quat, np.concatenate([[0], rot])) return np.concatenate([dpdt, dqdt, dvdt, drdt]) - def trim(self, vh, vz): + def trim(self, target): + vh, vz = target x = np.zeros(13) x[7] = vh x[9] = vz diff --git a/mavion/sac_mavion_1.zip b/mavion/sac_mavion_1.zip new file mode 100644 index 0000000000000000000000000000000000000000..c6f32c49fc5f05c1f750a783e2adb0c6ee250302 Binary files /dev/null and b/mavion/sac_mavion_1.zip differ diff --git a/mavion/train.py b/mavion/train.py new file mode 100644 index 0000000000000000000000000000000000000000..ecb71f3429e03bacef074816ee4a997f6fa1cc3a --- /dev/null +++ b/mavion/train.py @@ -0,0 +1,14 @@ +import numpy as np +from stable_baselines3 import SAC +from mavion_env import MavionEnv + + +if __name__ == "__main__": + + env = MavionEnv() + env.target = np.array([0, 0, -20, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + + model = SAC("MlpPolicy", env, verbose=1) + model.learn(total_timesteps=20000, log_interval=10) + model.save("sac_mavion_1") +