Merge pull request #4 from danielbinschmid/attitude

attitude action type feature and experiment utilizing it (follower_m.py)
danielbinschmid · Jan 23, 2024 · 83e7c95 · 83e7c95
2 parents 4e49452 + df5ec28
commit 83e7c95
Show file tree

Hide file tree

Showing 7 changed files with 265 additions and 19 deletions.
diff --git a/agents/follower_m.py b/agents/follower_m.py
@@ -0,0 +1,170 @@
+"""Script learns an agent to follow a target trajectory.
+"""
+
+import os
+from datetime import datetime
+import argparse
+import numpy as np
+from stable_baselines3 import PPO
+from stable_baselines3.common.env_util import make_vec_env
+from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
+from aviaries.SimpleFollowerAviary import SimpleFollowerAviary
+from aviaries.FollowerAviary import FollowerAviary
+from gym_pybullet_drones.utils.utils import str2bool
+from gym_pybullet_drones.utils.enums import ObservationType, ActionType
+from trajectories import TrajectoryFactory, Waypoint, DiscretizedTrajectory
+from agents.test_simple_follower import test_simple_follower
+
+DEFAULT_GUI = True
+DEFAULT_RECORD_VIDEO = False
+DEFAULT_OUTPUT_FOLDER = 'results'
+DEFAULT_COLAB = False
+
+DEFAULT_OBS = ObservationType('kin') # 'kin' or 'rgb'
+DEFAULT_ACT = ActionType.ATTITUDE_PID # 'rpm' or 'pid' or 'vel' or 'one_d_rpm' or 'one_d_pid'
+DEFAULT_AGENTS = 2
+DEFAULT_MA = False
+DEFAULT_TIMESTEPS = 3e5
+
+def run(multiagent=DEFAULT_MA, output_folder=DEFAULT_OUTPUT_FOLDER,
+        gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB,
+        record_video=DEFAULT_RECORD_VIDEO, local=True,
+        timesteps=DEFAULT_TIMESTEPS):
+
+    # CONFIG ##################################################
+
+    # target trajectory
+    t_wps = TrajectoryFactory.waypoints_from_numpy(
+        np.asarray([
+            [0, 0.5, 0.5],
+        ])
+    )
+    initial_xyzs = np.array([[0.,     0.,     0.1]])
+    t_traj = TrajectoryFactory.get_discr_from_wps(t_wps)
+
+    # output path location
+    filename = os.path.join(output_folder, 'save-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S"))
+    if not os.path.exists(filename):
+        os.makedirs(filename+'/')
+
+    # target reward 
+    if DEFAULT_ACT == ActionType.ONE_D_RPM:
+        target_reward = 474.15 if not multiagent else 949.5
+    else:
+        target_reward = 467. if not multiagent else 920.
+
+    # #########################################################
+
+    # ENVS ####################################################
+
+    if multiagent:
+        raise NotImplementedError("not implemented")
+
+    train_env = make_vec_env(
+        SimpleFollowerAviary,
+        env_kwargs=dict(
+            target_trajectory=t_traj,
+            initial_xyzs=initial_xyzs,
+            obs=DEFAULT_OBS, 
+            act=DEFAULT_ACT
+        ),
+        n_envs=20,
+        seed=0
+    )
+    eval_env = SimpleFollowerAviary(
+        target_trajectory=t_traj,
+        initial_xyzs=initial_xyzs,
+        obs=DEFAULT_OBS, 
+        act=DEFAULT_ACT
+    )
+
+    # #########################################################
+
+    # SETUP ###################################################
+
+    # model
+    model = PPO('MlpPolicy',
+                train_env,
+                # tensorboard_log=filename+'/tb/',
+                verbose=1)
+
+    # callbacks
+    callback_on_best = StopTrainingOnRewardThreshold(
+        reward_threshold=target_reward,
+        verbose=1
+    )
+    eval_callback = EvalCallback(eval_env,
+                                 callback_on_new_best=callback_on_best,
+                                 verbose=1,
+                                 best_model_save_path=filename+'/',
+                                 log_path=filename+'/',
+                                 eval_freq=int(1000),
+                                 deterministic=True,
+                                 render=False)
+
+    # ##########################################################
+
+    print('[INFO] Action space:', train_env.action_space)
+    print('[INFO] Observation space:', train_env.observation_space)
+    print('[INFO] Number of timesteps:', timesteps)
+
+    # TRAIN ####################################################
+
+    # fit
+    model.learn(total_timesteps=timesteps,
+                callback=eval_callback,
+                log_interval=100)
+
+    # save model
+    model.save(filename+'/final_model.zip')
+    print(filename)
+
+    # print training progression 
+    with np.load(filename+'/evaluations.npz') as data:
+        for j in range(data['timesteps'].shape[0]):
+            print(str(data['timesteps'][j])+","+str(data['results'][j][0]))
+
+    # ##########################################################
+
+    # TEST #####################################################
+
+    test_env = SimpleFollowerAviary(
+        target_trajectory=t_traj,
+        initial_xyzs=initial_xyzs,
+        gui=gui,
+        obs=DEFAULT_OBS,
+        act=DEFAULT_ACT,
+        record=record_video
+    )
+    test_env_nogui = SimpleFollowerAviary(
+        target_trajectory=t_traj,
+        initial_xyzs=initial_xyzs,
+        obs=DEFAULT_OBS, 
+        act=DEFAULT_ACT
+    )
+
+    test_simple_follower(
+        local=local,
+        filename=filename,
+        test_env_nogui=test_env_nogui,
+        test_env=test_env,
+        output_folder=output_folder
+    )
+
+    # ##########################################################
+
+
+
+
+if __name__ == '__main__':
+    #### Define and parse (optional) arguments for the script ##
+    parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script')
+    parser.add_argument('--multiagent',         default=DEFAULT_MA,            type=str2bool,      help='Whether to use example LeaderFollower instead of Hover (default: False)', metavar='')
+    parser.add_argument('--gui',                default=DEFAULT_GUI,           type=str2bool,      help='Whether to use PyBullet GUI (default: True)', metavar='')
+    parser.add_argument('--record_video',       default=DEFAULT_RECORD_VIDEO,  type=str2bool,      help='Whether to record a video (default: False)', metavar='')
+    parser.add_argument('--output_folder',      default=DEFAULT_OUTPUT_FOLDER, type=str,           help='Folder where to save logs (default: "results")', metavar='')
+    parser.add_argument('--colab',              default=DEFAULT_COLAB,         type=bool,          help='Whether example is being run by a notebook (default: "False")', metavar='')
+    parser.add_argument('--timesteps',          default=DEFAULT_TIMESTEPS,     type=int,           help='number of train timesteps before stopping', metavar='')
+    ARGS = parser.parse_args()
+
+    run(**vars(ARGS))
diff --git a/aviaries/FollowerAviary.py b/aviaries/FollowerAviary.py
@@ -175,7 +175,7 @@ def _observationSpace(self):
             act_lo = -1
             act_hi = +1
             for i in range(self.ACTION_BUFFER_SIZE):
-                if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
+                if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL, ActionType.ATTITUDE_PID]:
                     obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo,act_lo]])])
                     obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi,act_hi]])])
                 elif self.ACT_TYPE==ActionType.PID:

diff --git a/aviaries/SimpleFollowerAviary.py b/aviaries/SimpleFollowerAviary.py
@@ -3,6 +3,8 @@
 from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
 from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType
 from trajectories import DiscretizedTrajectory
+from typing import Dict, Any
+import pybullet as p
 
 class SimpleFollowerAviary(BaseRLAviary):
     """Single agent RL problem: hover at position."""
@@ -51,7 +53,7 @@ def __init__(self,
 
         """
         self.INIT_XYZS = initial_xyzs
-        self.TARGET_POS = np.array([0,0.5,0.5])
+        self.TARGET_POS = target_trajectory[0].coordinate # np.array([0,0.5,0.5])
         self.EPISODE_LEN_SEC = 8
         super().__init__(drone_model=drone_model,
                          num_drones=1,
@@ -121,15 +123,55 @@ def _computeTruncated(self):
 
     ################################################################################
 
-    def _computeInfo(self):
-        """Computes the current info dict(s).
+    def _computeInfo(self) -> Dict[str, Any]:
+        """
+        Any additional computation in simulation loop. 
+        Returns interesting information to the user as a dictionary.
+        """
+        self._vis()
+
+        return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years
 
-        Unused.
+    def _vis(self):
+        """
+        Additional visualization in target environmnet.
+        """
+        # target waypoint visualization
+        sphere_visual = p.createVisualShape(shapeType=p.GEOM_SPHERE,
+                                                radius=0.03,
+                                                rgbaColor=[0, 1, 0, 1],
+                                                physicsClientId=self.CLIENT)
+        target = p.createMultiBody(baseMass=0.0,
+                                    baseCollisionShapeIndex=-1,
+                                    baseVisualShapeIndex=sphere_visual,
+                                    basePosition=self.TARGET_POS,
+                                    useMaximalCoordinates=False,
+                                    physicsClientId=self.CLIENT)
+        p.changeVisualShape(target,
+                            -1,
+                            rgbaColor=[0.9, 0.3, 0.3, 1],
+                            physicsClientId=self.CLIENT)
+
+    def _computeObs(self):
+        """Returns the current observation of the environment.
 
         Returns
         -------
-        dict[str, int]
-            Dummy value.
+        ndarray
+            A Box() of shape (NUM_DRONES,H,W,4) or (NUM_DRONES,12) depending on the observation type.
 
         """
-        return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years
+
+        # position, velocity and acceleration observation
+        obs_12 = np.zeros((self.NUM_DRONES,12))
+        for i in range(self.NUM_DRONES):
+            obs = self._getDroneStateVector(i)
+            obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,)
+        ret = np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32')
+
+        # action buffer
+        for i in range(self.ACTION_BUFFER_SIZE):
+            ret = np.hstack([ret, np.array([self.action_buffer[i][j, :] for j in range(self.NUM_DRONES)])])
+
+        return ret
+
diff --git a/gym_pybullet_drones/control/DSLPIDControl.py b/gym_pybullet_drones/control/DSLPIDControl.py
@@ -88,7 +88,8 @@ def computeControl(self,
                        target_pos,
                        target_rpy=np.zeros(3),
                        target_vel=np.zeros(3),
-                       target_rpy_rates=np.zeros(3)
+                       target_rpy_rates=np.zeros(3),
+                       target_thrust: float=None,
                        ):
         """Computes the PID control action (as RPMs) for a single drone.
 
@@ -135,13 +136,25 @@ def computeControl(self,
                                                                          target_rpy,
                                                                          target_vel
                                                                          )
-        rpm = self._dslPIDAttitudeControl(control_timestep,
-                                          thrust,
-                                          cur_quat,
-                                          computed_target_rpy,
-                                          target_rpy_rates
-                                          )
         cur_rpy = p.getEulerFromQuaternion(cur_quat)
+        if target_thrust is None:
+            rpm = self._dslPIDAttitudeControl(control_timestep,
+                                            thrust,
+                                            cur_quat,
+                                            computed_target_rpy,
+                                            target_rpy_rates
+                                            )
+        else: 
+            # attitude control
+            target_thrust = (target_thrust - self.PWM2RPM_CONST) / self.PWM2RPM_SCALE
+            rpm = self._dslPIDAttitudeControl(
+                control_timestep=control_timestep,
+                thrust=target_thrust,
+                cur_quat=cur_quat,
+                target_euler=np.array(cur_rpy),
+                target_rpy_rates=target_rpy_rates
+            )
+
         return rpm, pos_e, computed_target_rpy[2] - cur_rpy[2]
 
     ################################################################################

diff --git a/gym_pybullet_drones/envs/BaseRLAviary.py b/gym_pybullet_drones/envs/BaseRLAviary.py
@@ -70,7 +70,7 @@ def __init__(self,
         self.OBS_TYPE = obs
         self.ACT_TYPE = act
         #### Create integrated controllers #########################
-        if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID]:
+        if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID, ActionType.ATTITUDE_PID]:
             os.environ['KMP_DUPLICATE_LIB_OK']='True'
             if drone_model in [DroneModel.CF2X, DroneModel.CF2P]:
                 self.ctrl = [DSLPIDControl(drone_model=DroneModel.CF2X) for i in range(num_drones)]
@@ -138,7 +138,7 @@ def _actionSpace(self):
             A Box of size NUM_DRONES x 4, 3, or 1, depending on the action type.
 
         """
-        if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
+        if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL, ActionType.ATTITUDE_PID]:
             size = 4
         elif self.ACT_TYPE==ActionType.PID:
             size = 3
@@ -233,6 +233,22 @@ def _preprocessAction(self,
                                                         target_pos=state[0:3]+0.1*np.array([0,0,target[0]])
                                                         )
                 rpm[k,:] = res
+            elif self.ACT_TYPE == ActionType.ATTITUDE_PID:
+                state = self._getDroneStateVector(k)
+                target_thrust = float(target[0])
+                target_thrust = np.array(self.HOVER_RPM * (1+0.05*target_thrust))
+                target_rpy_rates = target[1:4]
+                res, _, _ = self.ctrl[k].computeControl(
+                    control_timestep=self.CTRL_TIMESTEP,
+                    cur_pos=state[0:3],
+                    cur_quat=state[3:7],
+                    cur_vel=state[10:13],
+                    cur_ang_vel=state[13:16],
+                    target_pos=state[0:3],
+                    target_rpy_rates=target_rpy_rates,
+                    target_thrust=target_thrust
+                )
+                rpm[k,:] = res
             else:
                 print("[ERROR] in BaseRLAviary._preprocessAction()")
                 exit()
@@ -265,7 +281,7 @@ def _observationSpace(self):
             act_lo = -1
             act_hi = +1
             for i in range(self.ACTION_BUFFER_SIZE):
-                if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
+                if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL, ActionType.ATTITUDE_PID]:
                     obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])])
                     obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])])
                 elif self.ACT_TYPE==ActionType.PID:
@@ -274,6 +290,9 @@ def _observationSpace(self):
                 elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
                     obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo] for i in range(self.NUM_DRONES)])])
                     obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi] for i in range(self.NUM_DRONES)])])
+                else:
+                    print("[ERROR] in BaseRLAviary._observationSpace()")
+                    exit()
             return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32)
             ############################################################
         else:

diff --git a/gym_pybullet_drones/utils/Logger.py b/gym_pybullet_drones/utils/Logger.py
@@ -101,7 +101,8 @@ def log(self,
 
         """
         if drone < 0 or drone >= self.NUM_DRONES or timestamp < 0 or len(state) != 20 or len(control) != 12:
-            print("[ERROR] in Logger.log(), invalid data")
+            print(f'[ERROR] in Logger.log(), invalid data. len(state)={len(state)}!=20 or len(control){len(control)}!=12.')
+
         current_counter = int(self.counters[drone])
         #### Add rows to the matrices if a counter exceeds their size
         if current_counter >= self.timestamps.shape[1]:

diff --git a/gym_pybullet_drones/utils/enums.py b/gym_pybullet_drones/utils/enums.py
@@ -39,6 +39,7 @@ class ActionType(Enum):
     VEL = "vel"                 # Velocity input (using PID control)
     ONE_D_RPM = "one_d_rpm"     # 1D (identical input to all motors) with RPMs
     ONE_D_PID = "one_d_pid"     # 1D (identical input to all motors) with PID control
+    ATTITUDE_PID = "attitude-pid" # attitude-pid control (only using attitude PID control). Action values is a four-dimensional vector: (thrust, roll rate, pitch rate, yaw rate).
 
 ################################################################################