Skip to content

Commit

Permalink
Merge pull request #4 from danielbinschmid/attitude
Browse files Browse the repository at this point in the history
attitude action type feature and experiment utilizing it (follower_m.py)
  • Loading branch information
danielbinschmid authored Jan 23, 2024
2 parents 4e49452 + df5ec28 commit 83e7c95
Show file tree
Hide file tree
Showing 7 changed files with 265 additions and 19 deletions.
170 changes: 170 additions & 0 deletions agents/follower_m.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
"""Script learns an agent to follow a target trajectory.
"""

import os
from datetime import datetime
import argparse
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import EvalCallback, StopTrainingOnRewardThreshold
from aviaries.SimpleFollowerAviary import SimpleFollowerAviary
from aviaries.FollowerAviary import FollowerAviary
from gym_pybullet_drones.utils.utils import str2bool
from gym_pybullet_drones.utils.enums import ObservationType, ActionType
from trajectories import TrajectoryFactory, Waypoint, DiscretizedTrajectory
from agents.test_simple_follower import test_simple_follower

DEFAULT_GUI = True
DEFAULT_RECORD_VIDEO = False
DEFAULT_OUTPUT_FOLDER = 'results'
DEFAULT_COLAB = False

DEFAULT_OBS = ObservationType('kin') # 'kin' or 'rgb'
DEFAULT_ACT = ActionType.ATTITUDE_PID # 'rpm' or 'pid' or 'vel' or 'one_d_rpm' or 'one_d_pid'
DEFAULT_AGENTS = 2
DEFAULT_MA = False
DEFAULT_TIMESTEPS = 3e5

def run(multiagent=DEFAULT_MA, output_folder=DEFAULT_OUTPUT_FOLDER,
gui=DEFAULT_GUI, plot=True, colab=DEFAULT_COLAB,
record_video=DEFAULT_RECORD_VIDEO, local=True,
timesteps=DEFAULT_TIMESTEPS):

# CONFIG ##################################################

# target trajectory
t_wps = TrajectoryFactory.waypoints_from_numpy(
np.asarray([
[0, 0.5, 0.5],
])
)
initial_xyzs = np.array([[0., 0., 0.1]])
t_traj = TrajectoryFactory.get_discr_from_wps(t_wps)

# output path location
filename = os.path.join(output_folder, 'save-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S"))
if not os.path.exists(filename):
os.makedirs(filename+'/')

# target reward
if DEFAULT_ACT == ActionType.ONE_D_RPM:
target_reward = 474.15 if not multiagent else 949.5
else:
target_reward = 467. if not multiagent else 920.

# #########################################################

# ENVS ####################################################

if multiagent:
raise NotImplementedError("not implemented")

train_env = make_vec_env(
SimpleFollowerAviary,
env_kwargs=dict(
target_trajectory=t_traj,
initial_xyzs=initial_xyzs,
obs=DEFAULT_OBS,
act=DEFAULT_ACT
),
n_envs=20,
seed=0
)
eval_env = SimpleFollowerAviary(
target_trajectory=t_traj,
initial_xyzs=initial_xyzs,
obs=DEFAULT_OBS,
act=DEFAULT_ACT
)

# #########################################################

# SETUP ###################################################

# model
model = PPO('MlpPolicy',
train_env,
# tensorboard_log=filename+'/tb/',
verbose=1)

# callbacks
callback_on_best = StopTrainingOnRewardThreshold(
reward_threshold=target_reward,
verbose=1
)
eval_callback = EvalCallback(eval_env,
callback_on_new_best=callback_on_best,
verbose=1,
best_model_save_path=filename+'/',
log_path=filename+'/',
eval_freq=int(1000),
deterministic=True,
render=False)

# ##########################################################

print('[INFO] Action space:', train_env.action_space)
print('[INFO] Observation space:', train_env.observation_space)
print('[INFO] Number of timesteps:', timesteps)

# TRAIN ####################################################

# fit
model.learn(total_timesteps=timesteps,
callback=eval_callback,
log_interval=100)

# save model
model.save(filename+'/final_model.zip')
print(filename)

# print training progression
with np.load(filename+'/evaluations.npz') as data:
for j in range(data['timesteps'].shape[0]):
print(str(data['timesteps'][j])+","+str(data['results'][j][0]))

# ##########################################################

# TEST #####################################################

test_env = SimpleFollowerAviary(
target_trajectory=t_traj,
initial_xyzs=initial_xyzs,
gui=gui,
obs=DEFAULT_OBS,
act=DEFAULT_ACT,
record=record_video
)
test_env_nogui = SimpleFollowerAviary(
target_trajectory=t_traj,
initial_xyzs=initial_xyzs,
obs=DEFAULT_OBS,
act=DEFAULT_ACT
)

test_simple_follower(
local=local,
filename=filename,
test_env_nogui=test_env_nogui,
test_env=test_env,
output_folder=output_folder
)

# ##########################################################




if __name__ == '__main__':
#### Define and parse (optional) arguments for the script ##
parser = argparse.ArgumentParser(description='Single agent reinforcement learning example script')
parser.add_argument('--multiagent', default=DEFAULT_MA, type=str2bool, help='Whether to use example LeaderFollower instead of Hover (default: False)', metavar='')
parser.add_argument('--gui', default=DEFAULT_GUI, type=str2bool, help='Whether to use PyBullet GUI (default: True)', metavar='')
parser.add_argument('--record_video', default=DEFAULT_RECORD_VIDEO, type=str2bool, help='Whether to record a video (default: False)', metavar='')
parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='')
parser.add_argument('--colab', default=DEFAULT_COLAB, type=bool, help='Whether example is being run by a notebook (default: "False")', metavar='')
parser.add_argument('--timesteps', default=DEFAULT_TIMESTEPS, type=int, help='number of train timesteps before stopping', metavar='')
ARGS = parser.parse_args()

run(**vars(ARGS))
2 changes: 1 addition & 1 deletion aviaries/FollowerAviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def _observationSpace(self):
act_lo = -1
act_hi = +1
for i in range(self.ACTION_BUFFER_SIZE):
if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL, ActionType.ATTITUDE_PID]:
obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo,act_lo]])])
obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi,act_hi]])])
elif self.ACT_TYPE==ActionType.PID:
Expand Down
56 changes: 49 additions & 7 deletions aviaries/SimpleFollowerAviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from gym_pybullet_drones.envs.BaseRLAviary import BaseRLAviary
from gym_pybullet_drones.utils.enums import DroneModel, Physics, ActionType, ObservationType
from trajectories import DiscretizedTrajectory
from typing import Dict, Any
import pybullet as p

class SimpleFollowerAviary(BaseRLAviary):
"""Single agent RL problem: hover at position."""
Expand Down Expand Up @@ -51,7 +53,7 @@ def __init__(self,
"""
self.INIT_XYZS = initial_xyzs
self.TARGET_POS = np.array([0,0.5,0.5])
self.TARGET_POS = target_trajectory[0].coordinate # np.array([0,0.5,0.5])
self.EPISODE_LEN_SEC = 8
super().__init__(drone_model=drone_model,
num_drones=1,
Expand Down Expand Up @@ -121,15 +123,55 @@ def _computeTruncated(self):

################################################################################

def _computeInfo(self):
"""Computes the current info dict(s).
def _computeInfo(self) -> Dict[str, Any]:
"""
Any additional computation in simulation loop.
Returns interesting information to the user as a dictionary.
"""
self._vis()

return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years

Unused.
def _vis(self):
"""
Additional visualization in target environmnet.
"""
# target waypoint visualization
sphere_visual = p.createVisualShape(shapeType=p.GEOM_SPHERE,
radius=0.03,
rgbaColor=[0, 1, 0, 1],
physicsClientId=self.CLIENT)
target = p.createMultiBody(baseMass=0.0,
baseCollisionShapeIndex=-1,
baseVisualShapeIndex=sphere_visual,
basePosition=self.TARGET_POS,
useMaximalCoordinates=False,
physicsClientId=self.CLIENT)
p.changeVisualShape(target,
-1,
rgbaColor=[0.9, 0.3, 0.3, 1],
physicsClientId=self.CLIENT)

def _computeObs(self):
"""Returns the current observation of the environment.
Returns
-------
dict[str, int]
Dummy value.
ndarray
A Box() of shape (NUM_DRONES,H,W,4) or (NUM_DRONES,12) depending on the observation type.
"""
return {"answer": 42} #### Calculated by the Deep Thought supercomputer in 7.5M years

# position, velocity and acceleration observation
obs_12 = np.zeros((self.NUM_DRONES,12))
for i in range(self.NUM_DRONES):
obs = self._getDroneStateVector(i)
obs_12[i, :] = np.hstack([obs[0:3], obs[7:10], obs[10:13], obs[13:16]]).reshape(12,)
ret = np.array([obs_12[i, :] for i in range(self.NUM_DRONES)]).astype('float32')

# action buffer
for i in range(self.ACTION_BUFFER_SIZE):
ret = np.hstack([ret, np.array([self.action_buffer[i][j, :] for j in range(self.NUM_DRONES)])])

return ret

27 changes: 20 additions & 7 deletions gym_pybullet_drones/control/DSLPIDControl.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ def computeControl(self,
target_pos,
target_rpy=np.zeros(3),
target_vel=np.zeros(3),
target_rpy_rates=np.zeros(3)
target_rpy_rates=np.zeros(3),
target_thrust: float=None,
):
"""Computes the PID control action (as RPMs) for a single drone.
Expand Down Expand Up @@ -135,13 +136,25 @@ def computeControl(self,
target_rpy,
target_vel
)
rpm = self._dslPIDAttitudeControl(control_timestep,
thrust,
cur_quat,
computed_target_rpy,
target_rpy_rates
)
cur_rpy = p.getEulerFromQuaternion(cur_quat)
if target_thrust is None:
rpm = self._dslPIDAttitudeControl(control_timestep,
thrust,
cur_quat,
computed_target_rpy,
target_rpy_rates
)
else:
# attitude control
target_thrust = (target_thrust - self.PWM2RPM_CONST) / self.PWM2RPM_SCALE
rpm = self._dslPIDAttitudeControl(
control_timestep=control_timestep,
thrust=target_thrust,
cur_quat=cur_quat,
target_euler=np.array(cur_rpy),
target_rpy_rates=target_rpy_rates
)

return rpm, pos_e, computed_target_rpy[2] - cur_rpy[2]

################################################################################
Expand Down
25 changes: 22 additions & 3 deletions gym_pybullet_drones/envs/BaseRLAviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def __init__(self,
self.OBS_TYPE = obs
self.ACT_TYPE = act
#### Create integrated controllers #########################
if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID]:
if act in [ActionType.PID, ActionType.VEL, ActionType.ONE_D_PID, ActionType.ATTITUDE_PID]:
os.environ['KMP_DUPLICATE_LIB_OK']='True'
if drone_model in [DroneModel.CF2X, DroneModel.CF2P]:
self.ctrl = [DSLPIDControl(drone_model=DroneModel.CF2X) for i in range(num_drones)]
Expand Down Expand Up @@ -138,7 +138,7 @@ def _actionSpace(self):
A Box of size NUM_DRONES x 4, 3, or 1, depending on the action type.

"""
if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL, ActionType.ATTITUDE_PID]:
size = 4
elif self.ACT_TYPE==ActionType.PID:
size = 3
Expand Down Expand Up @@ -233,6 +233,22 @@ def _preprocessAction(self,
target_pos=state[0:3]+0.1*np.array([0,0,target[0]])
)
rpm[k,:] = res
elif self.ACT_TYPE == ActionType.ATTITUDE_PID:
state = self._getDroneStateVector(k)
target_thrust = float(target[0])
target_thrust = np.array(self.HOVER_RPM * (1+0.05*target_thrust))
target_rpy_rates = target[1:4]
res, _, _ = self.ctrl[k].computeControl(
control_timestep=self.CTRL_TIMESTEP,
cur_pos=state[0:3],
cur_quat=state[3:7],
cur_vel=state[10:13],
cur_ang_vel=state[13:16],
target_pos=state[0:3],
target_rpy_rates=target_rpy_rates,
target_thrust=target_thrust
)
rpm[k,:] = res
else:
print("[ERROR] in BaseRLAviary._preprocessAction()")
exit()
Expand Down Expand Up @@ -265,7 +281,7 @@ def _observationSpace(self):
act_lo = -1
act_hi = +1
for i in range(self.ACTION_BUFFER_SIZE):
if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL]:
if self.ACT_TYPE in [ActionType.RPM, ActionType.VEL, ActionType.ATTITUDE_PID]:
obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo,act_lo,act_lo,act_lo] for i in range(self.NUM_DRONES)])])
obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi,act_hi,act_hi,act_hi] for i in range(self.NUM_DRONES)])])
elif self.ACT_TYPE==ActionType.PID:
Expand All @@ -274,6 +290,9 @@ def _observationSpace(self):
elif self.ACT_TYPE in [ActionType.ONE_D_RPM, ActionType.ONE_D_PID]:
obs_lower_bound = np.hstack([obs_lower_bound, np.array([[act_lo] for i in range(self.NUM_DRONES)])])
obs_upper_bound = np.hstack([obs_upper_bound, np.array([[act_hi] for i in range(self.NUM_DRONES)])])
else:
print("[ERROR] in BaseRLAviary._observationSpace()")
exit()
return spaces.Box(low=obs_lower_bound, high=obs_upper_bound, dtype=np.float32)
############################################################
else:
Expand Down
3 changes: 2 additions & 1 deletion gym_pybullet_drones/utils/Logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ def log(self,

"""
if drone < 0 or drone >= self.NUM_DRONES or timestamp < 0 or len(state) != 20 or len(control) != 12:
print("[ERROR] in Logger.log(), invalid data")
print(f'[ERROR] in Logger.log(), invalid data. len(state)={len(state)}!=20 or len(control){len(control)}!=12.')

current_counter = int(self.counters[drone])
#### Add rows to the matrices if a counter exceeds their size
if current_counter >= self.timestamps.shape[1]:
Expand Down
1 change: 1 addition & 0 deletions gym_pybullet_drones/utils/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class ActionType(Enum):
VEL = "vel" # Velocity input (using PID control)
ONE_D_RPM = "one_d_rpm" # 1D (identical input to all motors) with RPMs
ONE_D_PID = "one_d_pid" # 1D (identical input to all motors) with PID control
ATTITUDE_PID = "attitude-pid" # attitude-pid control (only using attitude PID control). Action values is a four-dimensional vector: (thrust, roll rate, pitch rate, yaw rate).

################################################################################

Expand Down

0 comments on commit 83e7c95

Please sign in to comment.