File tree 2 files changed +23
-18
lines changed
2 files changed +23
-18
lines changed Original file line number Diff line number Diff line change @@ -28,24 +28,24 @@ def default():
28
28
"""Default configuration for PPO."""
29
29
# General
30
30
algorithm = ppo .PPOAlgorithm
31
- num_agents = 10
32
- eval_episodes = 25
31
+ num_agents = 30
32
+ eval_episodes = 30
33
33
use_gpu = False
34
34
# Network
35
35
network = networks .feed_forward_gaussian
36
36
weight_summaries = dict (
37
37
all = r'.*' , policy = r'.*/policy/.*' , value = r'.*/value/.*' )
38
38
policy_layers = 200 , 100
39
39
value_layers = 200 , 100
40
- init_mean_factor = 0.05
40
+ init_mean_factor = 0.1
41
41
init_logstd = - 1
42
42
# Optimization
43
43
update_every = 30
44
44
update_epochs = 25
45
45
optimizer = 'AdamOptimizer'
46
46
learning_rate = 1e-4
47
47
# Losses
48
- discount = 0.985
48
+ discount = 0.995
49
49
kl_target = 1e-2
50
50
kl_cutoff_factor = 2
51
51
kl_cutoff_coef = 1000
@@ -59,35 +59,38 @@ def pendulum():
59
59
# Environment
60
60
env = 'Pendulum-v0'
61
61
max_length = 200
62
- steps = 1e6 # 1M
62
+ steps = 2e6 # 2M
63
63
return locals ()
64
64
65
65
66
- def cheetah ():
67
- """Configuration for MuJoCo's half cheetah task."""
66
+ def reacher ():
67
+ """Configuration for MuJoCo's reacher task."""
68
68
locals ().update (default ())
69
69
# Environment
70
- env = 'HalfCheetah -v1'
70
+ env = 'Reacher -v1'
71
71
max_length = 1000
72
- steps = 1e7 # 10M
72
+ steps = 5e6 # 5M
73
+ discount = 0.985
74
+ update_every = 60
73
75
return locals ()
74
76
75
77
76
- def walker ():
77
- """Configuration for MuJoCo's walker task."""
78
+ def cheetah ():
79
+ """Configuration for MuJoCo's half cheetah task."""
78
80
locals ().update (default ())
79
81
# Environment
80
- env = 'Walker2d -v1'
82
+ env = 'HalfCheetah -v1'
81
83
max_length = 1000
82
84
steps = 1e7 # 10M
85
+ discount = 0.99
83
86
return locals ()
84
87
85
88
86
- def reacher ():
87
- """Configuration for MuJoCo's reacher task."""
89
+ def walker ():
90
+ """Configuration for MuJoCo's walker task."""
88
91
locals ().update (default ())
89
92
# Environment
90
- env = 'Reacher -v1'
93
+ env = 'Walker2d -v1'
91
94
max_length = 1000
92
95
steps = 1e7 # 10M
93
96
return locals ()
@@ -99,7 +102,8 @@ def hopper():
99
102
# Environment
100
103
env = 'Hopper-v1'
101
104
max_length = 1000
102
- steps = 2e7 # 20M
105
+ steps = 1e7 # 10M
106
+ update_every = 60
103
107
return locals ()
104
108
105
109
@@ -109,7 +113,7 @@ def ant():
109
113
# Environment
110
114
env = 'Ant-v1'
111
115
max_length = 1000
112
- steps = 5e7 # 50M
116
+ steps = 2e7 # 20M
113
117
return locals ()
114
118
115
119
@@ -120,4 +124,5 @@ def humanoid():
120
124
env = 'Humanoid-v1'
121
125
max_length = 1000
122
126
steps = 5e7 # 50M
127
+ update_every = 60
123
128
return locals ()
Original file line number Diff line number Diff line change 23
23
24
24
setuptools .setup (
25
25
name = 'agents' ,
26
- version = '1.1 .0' ,
26
+ version = '1.2 .0' ,
27
27
description = (
28
28
'Efficient TensorFlow implementation of reinforcement learning '
29
29
'algorithms.' ),
You can’t perform that action at this time.
0 commit comments