|
| 1 | +# Tuned |
| 2 | +CartPole-v1: |
| 3 | + n_envs: 1 |
| 4 | + n_timesteps: !!float 5e4 |
| 5 | + policy: 'LinearPolicy' |
| 6 | + n_delta: 2 |
| 7 | + |
| 8 | +# Tuned |
| 9 | +Pendulum-v0: &pendulum-params |
| 10 | + n_envs: 1 |
| 11 | + n_timesteps: !!float 2e6 |
| 12 | + policy: 'MlpPolicy' |
| 13 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 14 | + learning_rate: !!float 0.018 |
| 15 | + n_delta: 4 |
| 16 | + n_top: 1 |
| 17 | + delta_std: 0.1 |
| 18 | + policy_kwargs: "dict(net_arch=[16])" |
| 19 | + zero_policy: False |
| 20 | + |
| 21 | +# TO BE Tuned |
| 22 | +LunarLander-v2: |
| 23 | + <<: *pendulum-params |
| 24 | + n_delta: 6 |
| 25 | + n_top: 1 |
| 26 | + n_timesteps: !!float 2e6 |
| 27 | + |
| 28 | +# Tuned |
| 29 | +LunarLanderContinuous-v2: |
| 30 | + <<: *pendulum-params |
| 31 | + n_timesteps: !!float 2e6 |
| 32 | + |
| 33 | +# Tuned |
| 34 | +Acrobot-v1: |
| 35 | + <<: *pendulum-params |
| 36 | + n_timesteps: !!float 5e5 |
| 37 | + |
| 38 | +# Tuned |
| 39 | +MountainCar-v0: |
| 40 | + <<: *pendulum-params |
| 41 | + n_delta: 8 |
| 42 | + n_timesteps: !!float 5e5 |
| 43 | + |
| 44 | +# Tuned |
| 45 | +MountainCarContinuous-v0: |
| 46 | + <<: *pendulum-params |
| 47 | + n_timesteps: !!float 5e5 |
| 48 | + delta_std: 0.2 |
| 49 | + |
| 50 | +# === Pybullet Envs === |
| 51 | + |
| 52 | +# Almost tuned |
| 53 | +HalfCheetahBulletEnv-v0: &pybullet-defaults |
| 54 | + n_envs: 1 |
| 55 | + policy: 'MlpPolicy' |
| 56 | + n_timesteps: !!float 7.5e7 |
| 57 | + learning_rate: !!float 0.02 |
| 58 | + delta_std: !!float 0.03 |
| 59 | + n_delta: 8 |
| 60 | + n_top: 8 |
| 61 | + alive_bonus_offset: 0 |
| 62 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 63 | + policy_kwargs: "dict(net_arch=[64, 64])" |
| 64 | + zero_policy: False |
| 65 | + |
| 66 | +# To be tuned |
| 67 | +AntBulletEnv-v0: |
| 68 | + n_envs: 1 |
| 69 | + policy: 'MlpPolicy' |
| 70 | + n_timesteps: !!float 7.5e7 |
| 71 | + learning_rate: !!float 0.02 |
| 72 | + delta_std: !!float 0.03 |
| 73 | + n_delta: 32 |
| 74 | + n_top: 32 |
| 75 | + alive_bonus_offset: 0 |
| 76 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 77 | + policy_kwargs: "dict(net_arch=[128, 64])" |
| 78 | + zero_policy: False |
| 79 | + |
| 80 | + |
| 81 | +Walker2DBulletEnv-v0: |
| 82 | + policy: 'MlpPolicy' |
| 83 | + n_timesteps: !!float 7.5e7 |
| 84 | + learning_rate: !!float 0.03 |
| 85 | + delta_std: !!float 0.025 |
| 86 | + n_delta: 40 |
| 87 | + n_top: 30 |
| 88 | + alive_bonus_offset: -1 |
| 89 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 90 | + policy_kwargs: "dict(net_arch=[64, 64])" |
| 91 | + zero_policy: False |
| 92 | + |
| 93 | +# Tuned |
| 94 | +HopperBulletEnv-v0: |
| 95 | + n_envs: 1 |
| 96 | + policy: 'LinearPolicy' |
| 97 | + n_timesteps: !!float 7e6 |
| 98 | + learning_rate: !!float 0.01 |
| 99 | + delta_std: !!float 0.025 |
| 100 | + n_delta: 8 |
| 101 | + n_top: 4 |
| 102 | + alive_bonus_offset: -1 |
| 103 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 104 | + |
| 105 | +ReacherBulletEnv-v0: |
| 106 | + <<: *pybullet-defaults |
| 107 | + n_timesteps: !!float 1e6 |
| 108 | + |
| 109 | +# === Mujoco Envs === |
| 110 | +# Params closest to original paper |
| 111 | +Swimmer-v3: |
| 112 | + n_envs: 1 |
| 113 | + policy: 'LinearPolicy' |
| 114 | + n_timesteps: !!float 2e6 |
| 115 | + learning_rate: !!float 0.02 |
| 116 | + delta_std: !!float 0.01 |
| 117 | + n_delta: 1 |
| 118 | + n_top: 1 |
| 119 | + alive_bonus_offset: 0 |
| 120 | + # normalize: "dict(norm_obs=True, norm_reward=False)" |
| 121 | + |
| 122 | +Hopper-v3: |
| 123 | + n_envs: 1 |
| 124 | + policy: 'LinearPolicy' |
| 125 | + n_timesteps: !!float 7e6 |
| 126 | + learning_rate: !!float 0.01 |
| 127 | + delta_std: !!float 0.025 |
| 128 | + n_delta: 8 |
| 129 | + n_top: 4 |
| 130 | + alive_bonus_offset: -1 |
| 131 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 132 | + |
| 133 | +HalfCheetah-v3: |
| 134 | + n_envs: 1 |
| 135 | + policy: 'LinearPolicy' |
| 136 | + n_timesteps: !!float 1.25e7 |
| 137 | + learning_rate: !!float 0.02 |
| 138 | + delta_std: !!float 0.03 |
| 139 | + n_delta: 32 |
| 140 | + n_top: 4 |
| 141 | + alive_bonus_offset: 0 |
| 142 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 143 | + |
| 144 | +Walker2d-v3: |
| 145 | + n_envs: 1 |
| 146 | + policy: 'LinearPolicy' |
| 147 | + n_timesteps: !!float 7.5e7 |
| 148 | + learning_rate: !!float 0.03 |
| 149 | + delta_std: !!float 0.025 |
| 150 | + n_delta: 40 |
| 151 | + n_top: 30 |
| 152 | + alive_bonus_offset: -1 |
| 153 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 154 | + |
| 155 | +Ant-v3: |
| 156 | + n_envs: 1 |
| 157 | + policy: 'LinearPolicy' |
| 158 | + n_timesteps: !!float 7.5e7 |
| 159 | + learning_rate: !!float 0.015 |
| 160 | + delta_std: !!float 0.025 |
| 161 | + n_delta: 60 |
| 162 | + n_top: 20 |
| 163 | + alive_bonus_offset: -1 |
| 164 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 165 | + |
| 166 | + |
| 167 | +Humanoid-v3: |
| 168 | + n_envs: 1 |
| 169 | + policy: 'LinearPolicy' |
| 170 | + n_timesteps: !!float 2.5e8 |
| 171 | + learning_rate: 0.02 |
| 172 | + delta_std: 0.0075 |
| 173 | + n_delta: 256 |
| 174 | + n_top: 256 |
| 175 | + alive_bonus_offset: -5 |
| 176 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 177 | + |
| 178 | +# Almost tuned |
| 179 | +BipedalWalker-v3: |
| 180 | + n_envs: 1 |
| 181 | + policy: 'MlpPolicy' |
| 182 | + n_timesteps: !!float 1e8 |
| 183 | + learning_rate: 0.02 |
| 184 | + delta_std: 0.0075 |
| 185 | + n_delta: 64 |
| 186 | + n_top: 32 |
| 187 | + alive_bonus_offset: -0.1 |
| 188 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 189 | + policy_kwargs: "dict(net_arch=[16])" |
| 190 | + |
| 191 | +# TO Be Tuned |
| 192 | +BipedalWalkerHardcore-v3: |
| 193 | + n_envs: 1 |
| 194 | + policy: 'MlpPolicy' |
| 195 | + n_timesteps: !!float 5e8 |
| 196 | + learning_rate: 0.02 |
| 197 | + delta_std: 0.0075 |
| 198 | + n_delta: 64 |
| 199 | + n_top: 32 |
| 200 | + alive_bonus_offset: -0.1 |
| 201 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 202 | + policy_kwargs: "dict(net_arch=[16])" |
| 203 | + |
| 204 | +A1Walking-v0: |
| 205 | + <<: *pendulum-params |
| 206 | + n_timesteps: !!float 2e6 |
| 207 | + |
| 208 | +A1Jumping-v0: |
| 209 | + policy: 'LinearPolicy' |
| 210 | + n_timesteps: !!float 7.5e7 |
| 211 | + learning_rate: !!float 0.03 |
| 212 | + delta_std: !!float 0.025 |
| 213 | + n_delta: 40 |
| 214 | + n_top: 30 |
| 215 | + # alive_bonus_offset: -1 |
| 216 | + normalize: "dict(norm_obs=True, norm_reward=False)" |
| 217 | + # policy_kwargs: "dict(net_arch=[16])" |
| 218 | + |
0 commit comments