-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrl_agent.py
215 lines (167 loc) · 8 KB
/
rl_agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import csv
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam
from keras.callbacks import CSVLogger
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from pysc2.agents import base_agent
from pysc2.lib import actions
from pysc2.lib import features
from pysc2.env import sc2_env
from absl import app
import rl.core
from gym import spaces
_PLAYER_SELF = features.PlayerRelative.SELF
_PLAYER_NEUTRAL = features.PlayerRelative.NEUTRAL # beacon/minerals
_PLAYER_ENEMY = features.PlayerRelative.ENEMY
class MoveToBeacon_KerasRL(base_agent.BaseAgent):
# test agent
# -- broken now, as env action changed)
def __init__(self):
super().__init__()
self.cnt = 0
def step(self, obs):
# super().step(obs)
if self.cnt == 0:
self.cnt += 1
return (5, 5)
elif self.cnt == 1:
self.cnt += 1
return (5, 50)
elif self.cnt == 2:
self.cnt += 1
return (50, 50)
else:
self.cnt = 0
return (50, 5)
class PySC2ToKerasRL_env(rl.core.Env):
# Converts PySC2 env outputs to the inputs Keras-rl agents expect
def __init__(self, PySC2_env):
self.env = PySC2_env
'''Move to Beacon'''
# define the action space and obs space based on the map, this is the move to beacon
self.action_space = spaces.Box(np.array(1024), np.array((64 ** 2)-1024)) # flatten action space
self.observation_space = spaces.Box(np.zeros(2),
np.array([63,63])) # x,y of target
self.action_space.n = 64 ** 2 # flatten the action space
def step(self, step_actions):
"""Run one timestep of the environment's dynamics.
Accepts an action and returns a tuple (observation, reward, done, info).
# Arguments
action (object): An action provided by the environment.
# Returns
observation (object): Agent's observation of the current environment.
reward (float) : Amount of reward returned after previous action.
done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results.
info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning).
"""
# map x,y (0-63,0-63) coord to the move marine command
x = step_actions % 64
y = step_actions // 64
step_action = [actions.FUNCTIONS.Move_minimap("now", (x, y))]
try:
timesteps = getattr(self.env, "step")(step_action) # step env
except:
timesteps = getattr(self.env, "step")([actions.FUNCTIONS.select_army('select')]) # select the marine
obs = timesteps[0].observation # retrieve updated obs
obs = obs.feature_minimap.player_relative # a 0-63,0-63 array with 0-4 for each value
obs = np.where(obs == 3)
obs = np.array([np.mean(obs[0]),np.mean(obs[1])])
reward = timesteps[0].reward # get reward
done = timesteps[0].last() # check if last step
info = {'items': 1} # dummy dict entry to make keras rl happy
return obs, reward, done, info
def reset(self):
"""
Resets the state of the environment and returns an initial observation.
# Returns
observation (object): The initial observation of the space. Initial reward is assumed to be 0.
"""
getattr(self.env, "reset")() # reset env using parent method
reset_action = [actions.FUNCTIONS.select_army('select')] # select marine
timesteps = getattr(self.env, "step")(reset_action) # select the marine
obs = timesteps[0].observation # retrieve initial obs
# format obs
obs = obs.feature_minimap.player_relative # a 0-63,0-63 array with 0-4 for each value
obs = np.where(obs==3)
obs = np.array([np.mean(obs[0]), np.mean(obs[1])])
return obs
def render(self, mode='human', close=False):
pass # can't really do much here since the env is already set up
def close(self):
getattr(self.env, "close")()
def main(unused_argv):
try:
while True:
with sc2_env.SC2Env(map_name="MoveToBeacon",
players=[sc2_env.Agent(sc2_env.Race.terran)],
agent_interface_format=features.AgentInterfaceFormat(
feature_dimensions=features.Dimensions(screen=84, minimap=64),
# default size of feature screen and feature minimap
use_feature_units=True),
step_mul=64, # 16 gives roughly 150 apm (8 would give 300 apm)
# larger num here makes it run faster
game_steps_per_episode=0,
visualize=True) as env:
# create a keras-rl env
keras_env = PySC2ToKerasRL_env(env)
obs = keras_env.reset()
# create an agent that can interact with
# Test Agent (makes marine run in circle)
# keras_agent = MoveToBeacon_KerasRL()
# keras_agent.reset()
# while True: #play the game
#
# step_actions = keras_agent.step(obs)
# obs, reward, done, info = keras_env.step(step_actions)
# Replace simple agent with a learning one
# A simple model (taken from Keras-RL cartpole dqn)
nb_actions = keras_env.action_space.n
model = Sequential()
model.add(Flatten(input_shape=(1,) + keras_env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())
output_filename ="DQN_Rewards_smallerObs_smallerActions.csv"
#some other model
# model = Sequential()
# model.add(Flatten(input_shape=(1,) + keras_env.observation_space.shape))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(16))
# model.add(Activation('relu'))
# model.add(Dense(nb_actions))
# model.add(Activation('linear'))
# print(model.summary())
#output_filename = "DQN Rewards.csv"
# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=15,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
# Okay, now it's time to learn something! (hopefully)
hist = dqn.fit(keras_env, nb_steps=50000, visualize=False, verbose=2)
with open(output_filename, 'w+', newline='') as csvfile: #save the rewards over time
writer = csv.writer(csvfile)
writer.writerow(hist.history.get('episode_reward'))
break #kill the env
except KeyboardInterrupt:
pass
if __name__ == "__main__":
app.run(main)