danielbinschmid · danielbinschmid · Jan 23, 2024 · Jan 23, 2024
diff --git a/agents/follower.py b/agents/follower.py
@@ -37,7 +37,6 @@
 DEFAULT_GUI = True
 DEFAULT_RECORD_VIDEO = False
 DEFAULT_OUTPUT_FOLDER = 'results'
-
 DEFAULT_OBS = ObservationType('kin') # 'kin' or 'rgb'
 DEFAULT_ACT = ActionType('vel') # 'rpm' or 'pid' or 'vel' or 'one_d_rpm' or 'one_d_pid'
 DEFAULT_AGENTS = 1
@@ -51,10 +50,11 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, record_
     train_env = make_vec_env(
         FollowerAviary,
         env_kwargs=dict(obs=DEFAULT_OBS, act=DEFAULT_ACT),
-        n_envs=20,
+        n_envs=1,
         seed=0
     )
-    eval_env = FollowerAviary(obs=DEFAULT_OBS, act=DEFAULT_ACT)
+    initial_xyzs = np.array([[0.,     0.,     0.1125]])
+    eval_env = FollowerAviary(obs=DEFAULT_OBS, act=DEFAULT_ACT, initial_xyzs=initial_xyzs)
 
     #### Check the environment's spaces ########################
     print('[INFO] Action space:', train_env.action_space)
@@ -85,7 +85,7 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, record_
         render=True
     )
     model.learn(
-        total_timesteps=int(5e5) if local else int(1e2), # shorter training in GitHub Actions pytest
+        total_timesteps=int(1e4) if local else int(1e2), # shorter training in GitHub Actions pytest
         callback=eval_callback,
         log_interval=100
     )
@@ -176,5 +176,5 @@ def run(output_folder=DEFAULT_OUTPUT_FOLDER, gui=DEFAULT_GUI, plot=True, record_
     parser.add_argument('--record_video',       default=DEFAULT_RECORD_VIDEO,  type=str2bool,      help='Whether to record a video (default: False)', metavar='')
     parser.add_argument('--output_folder',      default=DEFAULT_OUTPUT_FOLDER, type=str,           help='Folder where to save logs (default: "results")', metavar='')
     ARGS = parser.parse_args()
-
+    os.makedirs(DEFAULT_OUTPUT_FOLDER, exist_ok=True)
     run(**vars(ARGS))
diff --git a/aviaries/FollowerAviary.py b/aviaries/FollowerAviary.py
@@ -7,6 +7,7 @@
 
 from gym_pybullet_drones.control.DSLPIDControl import DSLPIDControl
 from trajectories import TrajectoryFactory
+from trajectories import Waypoint
 
 class FollowerAviary(BaseRLAviary):
     """Single agent RL problem: hover at position."""
@@ -15,7 +16,7 @@ class FollowerAviary(BaseRLAviary):
 
     def __init__(self,
                  drone_model: DroneModel=DroneModel.CF2X,
-                 initial_xyzs=None,
+                 initial_xyzs: np.ndarray = np.array([[0.,     0.,     0.1125]]),
                  initial_rpys=None,
                  physics: Physics=Physics.PYB,
                  pyb_freq: int = 240,
@@ -33,7 +34,7 @@ def __init__(self,
         ----------
         drone_model : DroneModel, optional
             The desired drone type (detailed in an .urdf file in folder `assets`).
-        initial_xyzs: ndarray | None, optional
+        initial_xyzs: ndarray
             (NUM_DRONES, 3)-shaped array containing the initial XYZ position of the drones.
         initial_rpys: ndarray | None, optional
             (NUM_DRONES, 3)-shaped array containing the initial orientations of the drones (in radians).
@@ -53,11 +54,21 @@ def __init__(self,
             The type of action space (1 or 3D; RPMS, thurst and torques, or waypoint with PID control)
 
         """
-        self.TARGET_POS = np.array([0,0,1])
+
         self.EPISODE_LEN_SEC = 16
         self.NUM_DRONES = 1
         self.WAYPOINT_BUFFER_SIZE = 3 # how many steps into future to interpolate
-        self.trajectory = TrajectoryFactory.get_linear_square_traj_discretized()
+
+        self.INIT_XYZS = initial_xyzs
+        starting_waypoint = self.INIT_XYZS[0].copy()
+        starting_waypoint[2] = starting_waypoint[2] + 0.1
+        starting_waypoint = Waypoint(
+            coordinate=starting_waypoint,
+            timestamp=0.0
+        )
+        self.trajectory = TrajectoryFactory.get_simple_smooth_trajectory(starting_waypoint, n_points_discretization_level=4)
+        self.TARGET_POS = self.trajectory[len(self.trajectory) - 1]
+
         self.n_waypoints = len(self.trajectory)
         self.current_waypoint_idx = 0
 
@@ -92,10 +103,15 @@ def _computeReward(self):
             The reward.
 
         """
+        alpha = .5
+        beta = 0.0025
+
         state = self._getDroneStateVector(0)
         waypoint = self.waypoint_buffer[self.current_waypoint_idx]
-        ret = max(0, 2 - np.linalg.norm(waypoint-state[0:3])**4)
-        return ret
+        reached_pos_reward = 1 if np.linalg.norm(waypoint-state[0:3]) < 0.001 else 0
+        distance_reward = max(0, 2 - np.linalg.norm(waypoint-state[0:3])**4)
+        distance_reward = alpha * distance_reward
+        return alpha * reached_pos_reward + beta * distance_reward
 
     ################################################################################
 
@@ -109,7 +125,7 @@ def _computeTerminated(self):
 
         """
         state = self._getDroneStateVector(0)
-        if np.linalg.norm(self.TARGET_POS-state[0:3]) < .0001:
+        if np.linalg.norm(self.TARGET_POS.coordinate-state[0:3]) < .0001:
             return True
         else:
             return False
@@ -197,9 +213,10 @@ def _observationSpace(self):
     def update_waypoints(self):
         drone_position = self._getDroneStateVector(0)[0:3]
         current_waypoint = self.waypoint_buffer[self.current_waypoint_idx]
-        if np.linalg.norm(drone_position - current_waypoint) < .1:
+        if np.linalg.norm(drone_position - current_waypoint) < .001:
             # replace reached waypoint with the waypoint that follows after all waypoints in the buffer
-            next_waypoint = self.trajectory.get_waypoint((self.current_waypoint_idx + len(self.waypoint_buffer)) % len(self.trajectory))
+            next_waypoint_idx = int(self.current_waypoint_idx + len(self.waypoint_buffer)) % len(self.trajectory)
+            next_waypoint = self.trajectory[next_waypoint_idx].coordinate
             self.waypoint_buffer[self.current_waypoint_idx] = next_waypoint
             # set next waypoint
             self.current_waypoint_idx = (self.current_waypoint_idx + 1) % self.WAYPOINT_BUFFER_SIZE

diff --git a/gym_pybullet_drones/utils/Logger.py b/gym_pybullet_drones/utils/Logger.py
@@ -373,7 +373,4 @@ def plot(self, pwm=False):
                             wspace=0.15,
                             hspace=0.0
                             )
-        if self.COLAB: 
-            plt.savefig(os.path.join('results', 'output_figure.png'))
-        else:
-            plt.show()
+        plt.savefig(os.path.join('results', 'output_figure.pdf'))
diff --git a/runnables/pid_follow_trajectory.py b/runnables/pid_follow_trajectory.py
@@ -22,7 +22,7 @@
 import time
 import argparse
 import numpy as np
-from trajectories import TrajectoryFactory
+from trajectories import TrajectoryFactory, Waypoint
 
 from gym_pybullet_drones.utils.enums import DroneModel, Physics
 from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary
@@ -60,11 +60,19 @@ def run(
         colab=DEFAULT_COLAB
         ):
     #### Initialize the simulation #############################
-    INIT_XYZS = np.array([[0., 0., 0.5]])
+    INIT_XYZS = np.array([[0., 0., 0.1125]])
     INIT_RPYS = np.array([[0., 0., 0.]])
 
     #### Initialize a polynomial trajectory ####################
-    target_trajectory = TrajectoryFactory.get_pol_discretized_trajectory()
+    starting_waypoint = INIT_XYZS[0].copy()
+    starting_waypoint[2] = starting_waypoint[2] + 0.1
+    starting_waypoint = Waypoint(
+        coordinate=starting_waypoint,
+        timestamp=0.0
+    )
+    target_trajectory = TrajectoryFactory.get_simple_smooth_trajectory(starting_waypoint)
+    for wp in range(len(target_trajectory)):
+        print(target_trajectory[wp].coordinate)
     n_waypoints = len(target_trajectory)
     cur_waypoint_idx = 0
 
@@ -98,6 +106,11 @@ def run(
     #### Run the simulation
     action = np.zeros((num_drones,4))
     START = time.time()
+
+    final_coordinate = target_trajectory[len(target_trajectory) - 1].coordinate
+
+    done = False
+    # print(final_coordinate)
     for i in range(int(1e5)):
 
         #### Step the simulation ###################################
@@ -115,12 +128,19 @@ def run(
 
         #### Go to the next way point and loop #####################
         position = obs[0][0:3]
-
+        distance2final = np.linalg.norm(final_coordinate - position)
         distance = np.linalg.norm(target_position - position)
         velocity = np.linalg.norm(obs[0][10:13])
-        if distance < 0.1:
+
+        if distance2final < 0.1 and cur_waypoint_idx >= len(target_trajectory) - 5:
+            done = True
+
+        if distance < 0.1 and not done:
             cur_waypoint_idx = (cur_waypoint_idx + 1) % n_waypoints
 
+
+        # if distance2final < 0.1:
+        #     break
         ##### Log the simulation ####################################
 
         #### Printout

diff --git a/trajectories/trajectory_factory.py b/trajectories/trajectory_factory.py
@@ -10,41 +10,43 @@
 class WAYPOINT_BOOTH:
     T_WAYPOINTS_POLY = [
          Waypoint(
-              coordinate=np.asarray([0, 0, 0]),
-              timestamp=0
+              coordinate=np.asarray([0, 0, 0.25]),
+              timestamp=1
          ),
          Waypoint(
-              coordinate=np.asarray([0, 1, 0.25]),
+              coordinate=np.asarray([0, 0, 0.25]),
               timestamp=2
          ),
          Waypoint(
-              coordinate=np.asarray([1, 1, 0.5]),
-              timestamp=4
+              coordinate=np.asarray([0, 0, 0.5]),
+              timestamp=3
          ),
          Waypoint(
-              coordinate=np.asarray([1, 0, 0.75]),
-              timestamp=8
+              coordinate=np.asarray([0, 0, 0.25]),
+              timestamp=4
          ),
          Waypoint(
-              coordinate=np.asarray([0, 0, 1]),
-              timestamp=10
-         ), 
+              coordinate=np.asarray([0, 0, 0]),
+              timestamp=5
+         )
+    ]
+    T_WAYPOINTS_POLY_ALL = [
          Waypoint(
-              coordinate=np.asarray([1,1,1]),
-              timestamp=12
+              coordinate=np.asarray([0, 0, 0]),
+              timestamp=0
          ),
          Waypoint(
-              coordinate=np.asarray([1, 0, 0.75]),
-              timestamp=14
+              coordinate=np.asarray([0, 0.5, 0.15]),
+              timestamp=1
          ),
          Waypoint(
-              coordinate=np.asarray([1, 1, 0.5]),
-              timestamp=16
+              coordinate=np.asarray([0.25, 0.25, 0.075]),
+              timestamp=2
          ),
          Waypoint(
               coordinate=np.asarray([0, 0, 0]),
-              timestamp=18
-         ),
+              timestamp=3
+         )
     ]
 
 
@@ -71,12 +73,24 @@ def get_linear_square_traj_discretized(cls, n_discretization_level: int=100, squ
     def get_pol_discretized_trajectory(cls, t_waypoints: Optional[List[Waypoint]]=None, n_points_discretization_level: Optional[int] = None) -> PolynomialDiscretizedTrajectory:
 
         if t_waypoints is None:
-            t_waypoints = WAYPOINT_BOOTH.T_WAYPOINTS_POLY
+            t_waypoints = WAYPOINT_BOOTH.T_WAYPOINTS_POLY_ALL
         if n_points_discretization_level is None: 
             n_points_discretization_level = 100
 
         return PolynomialDiscretizedTrajectory(
             t_waypoints,
             n_points_discretization_level
         )
+
+    @classmethod
+    def get_simple_smooth_trajectory(cls, starting_waypoint: Waypoint, t_waypoints: Optional[List[Waypoint]]=None, n_points_discretization_level: Optional[int] = None) -> DiscretizedTrajectory:
+        if t_waypoints is None:
+            t_waypoints = [starting_waypoint] + WAYPOINT_BOOTH.T_WAYPOINTS_POLY
+        if n_points_discretization_level is None: 
+            n_points_discretization_level = 100
+
+        return PolynomialDiscretizedTrajectory(
+            t_waypoints,
+            n_points_discretization_level
+        )