-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmcts_config.yml
80 lines (70 loc) · 1.69 KB
/
mcts_config.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
use_tensorboard: true
log_time: true
layerwise_logging: true
use_cuda: false
scale_observation: true
version_string: 'mcts_v0'
dimension: 3
max_num_points: 10
max_value: 20 # Not sure if this impacts the game at all
max_grad_norm: 10
c_puct: 0.5
lr: 0.0000001
max_depth: 20
MSE_coefficient: 1.0
# MCTS only trains one player at a time
# If host is specified, agent will need to be passed during initialization, and vice-versa.
host:
# Note: Simulation is always run step-by-step, but the number of batches is used when training the policy net.
batch_size: 10
iterations: 200,
c_puct: 0.5,
max_depth: 20,
MSE_coefficient: 1.0,
c_puct: 0.5
lr: 0.00001
max_depth: 20
MSE_coefficient: 1.0
batch_size: 1
initial_rollout_size: 100
max_rollout_step: 10
optim:
name: 'adam'
args: # Pass optimizer parameters here
lr: 0.000001
lr_schedule: # (OPTIONAL) Use a scheduler on the learning rate
mode: 'exponential'
initial_lr: 0.001
rate: 0.996
er: 0.2 # Exploration rate
er_schedule: # (OPTIONAL) Use a scheduler on the exploration rate
mode: 'exponential'
initial_er: 0.5
rate: 0.996
net_arch: [ 32, { repeat: 2, net_arch: [ 32, 'b' ] }, 16 ]
agent:
batch_size: 256
initial_rollout_size: 100
max_rollout_step: 10
c_puct: 0.5
lr: 0.00001
max_depth: 20
MSE_coefficient: 1.0
optim:
name: 'adam'
args:
lr: 0.00000001
lr_schedule:
mode: 'exponential'
initial_lr: 0.001
rate: 0.996
er: 0.2
er_schedule:
mode: 'exponential'
initial_er: 0.5
rate: 0.996
net_arch: [ 128, 'b', { repeat: 20, net_arch: [ 256, 'b' ] }, 128, 'b' ]
replay_buffer:
type: 'base'
buffer_size: 0
use_cuda: false