Skip to content

Commit bbd7199

Browse files
committed
Improve default and MuJoCo configs.
1 parent e897ab1 commit bbd7199

File tree

2 files changed

+23
-18
lines changed

2 files changed

+23
-18
lines changed

agents/scripts/configs.py

+22-17
Original file line numberDiff line numberDiff line change
@@ -28,24 +28,24 @@ def default():
2828
"""Default configuration for PPO."""
2929
# General
3030
algorithm = ppo.PPOAlgorithm
31-
num_agents = 10
32-
eval_episodes = 25
31+
num_agents = 30
32+
eval_episodes = 30
3333
use_gpu = False
3434
# Network
3535
network = networks.feed_forward_gaussian
3636
weight_summaries = dict(
3737
all=r'.*', policy=r'.*/policy/.*', value=r'.*/value/.*')
3838
policy_layers = 200, 100
3939
value_layers = 200, 100
40-
init_mean_factor = 0.05
40+
init_mean_factor = 0.1
4141
init_logstd = -1
4242
# Optimization
4343
update_every = 30
4444
update_epochs = 25
4545
optimizer = 'AdamOptimizer'
4646
learning_rate = 1e-4
4747
# Losses
48-
discount = 0.985
48+
discount = 0.995
4949
kl_target = 1e-2
5050
kl_cutoff_factor = 2
5151
kl_cutoff_coef = 1000
@@ -59,35 +59,38 @@ def pendulum():
5959
# Environment
6060
env = 'Pendulum-v0'
6161
max_length = 200
62-
steps = 1e6 # 1M
62+
steps = 2e6 # 2M
6363
return locals()
6464

6565

66-
def cheetah():
67-
"""Configuration for MuJoCo's half cheetah task."""
66+
def reacher():
67+
"""Configuration for MuJoCo's reacher task."""
6868
locals().update(default())
6969
# Environment
70-
env = 'HalfCheetah-v1'
70+
env = 'Reacher-v1'
7171
max_length = 1000
72-
steps = 1e7 # 10M
72+
steps = 5e6 # 5M
73+
discount = 0.985
74+
update_every = 60
7375
return locals()
7476

7577

76-
def walker():
77-
"""Configuration for MuJoCo's walker task."""
78+
def cheetah():
79+
"""Configuration for MuJoCo's half cheetah task."""
7880
locals().update(default())
7981
# Environment
80-
env = 'Walker2d-v1'
82+
env = 'HalfCheetah-v1'
8183
max_length = 1000
8284
steps = 1e7 # 10M
85+
discount = 0.99
8386
return locals()
8487

8588

86-
def reacher():
87-
"""Configuration for MuJoCo's reacher task."""
89+
def walker():
90+
"""Configuration for MuJoCo's walker task."""
8891
locals().update(default())
8992
# Environment
90-
env = 'Reacher-v1'
93+
env = 'Walker2d-v1'
9194
max_length = 1000
9295
steps = 1e7 # 10M
9396
return locals()
@@ -99,7 +102,8 @@ def hopper():
99102
# Environment
100103
env = 'Hopper-v1'
101104
max_length = 1000
102-
steps = 2e7 # 20M
105+
steps = 1e7 # 10M
106+
update_every = 60
103107
return locals()
104108

105109

@@ -109,7 +113,7 @@ def ant():
109113
# Environment
110114
env = 'Ant-v1'
111115
max_length = 1000
112-
steps = 5e7 # 50M
116+
steps = 2e7 # 20M
113117
return locals()
114118

115119

@@ -120,4 +124,5 @@ def humanoid():
120124
env = 'Humanoid-v1'
121125
max_length = 1000
122126
steps = 5e7 # 50M
127+
update_every = 60
123128
return locals()

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
setuptools.setup(
2525
name='agents',
26-
version='1.1.0',
26+
version='1.2.0',
2727
description=(
2828
'Efficient TensorFlow implementation of reinforcement learning '
2929
'algorithms.'),

0 commit comments

Comments
 (0)