-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathAgent.py
102 lines (79 loc) · 3.91 KB
/
Agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
from Brain import Brain
from ReplayBuffer import ReplayBuffer
from keras.models import load_model
import numpy as np
class Agent(object):
"""Agent interacting with and learning from the environment."""
def __init__(self, alpha, gamma, n_actions, epsilon, batch_size,
input_dims, epsilon_dec, epsilon_min,
mem_size, replace_target, fname='model/model.keras'):
"""
Initialize the agent.
Args:
alpha (float): Learning rate.
gamma (float): Discount factor.
n_actions (int): Number of possible actions.
epsilon (float): Exploration rate.
batch_size (int): Batch size for training the model.
input_dims (tuple): Dimensions of the input state.
epsilon_dec (float): Rate at which epsilon is decremented.
epsilon_min (float): Minimum value of epsilon.
mem_size (int): Size of the memory buffer.
replace_target (float): Target for updating the target network.
fname (str): File name for saving and loading the model.
"""
self.action_space = [i for i in range(n_actions)]
self.n_actions = n_actions
self.alpha = alpha
self.gamma = gamma
self.epsilon = epsilon
self.epsilon_dec = epsilon_dec
self.epsilon_min = epsilon_min
self.batch_size = batch_size
self.model_file = fname
self.replace_target = replace_target
self.memory = ReplayBuffer(mem_size, input_dims, n_actions, discrete=True)
self.brain_eval = Brain(input_dims, n_actions, alpha, batch_size)
self.brain_target = Brain(input_dims, n_actions, alpha, batch_size)
def remember(self, state, action, reward, new_state, done):
"""Store a transition in the memory buffer."""
self.memory.store_transition(state, action, reward, new_state, done)
def get_action(self, state):
"""Return the action to be taken based on the current state."""
state = np.array(state)
state = state[np.newaxis, :]
rand = np.random.random()
if rand < self.epsilon:
action = np.random.choice(self.action_space)
else:
actions = self.brain_eval.predict(state)
action = np.argmax(actions)
return action
def learn(self):
"""Train the model using the experiences in the memory buffer."""
if self.memory.mem_cntr > self.batch_size:
state, action, reward, new_state, done = self.memory.sample_buffer(self.batch_size)
action_values = np.array(self.action_space, dtype=np.int8)
action_indices = np.dot(action, action_values)
q_next = self.brain_target.predict(new_state)
q_eval = self.brain_eval.predict(new_state)
q_pred = self.brain_eval.predict(state)
max_actions = np.argmax(q_eval, axis=1)
q_target = q_pred
batch_index = np.arange(self.batch_size, dtype=np.int32)
q_target[batch_index, action_indices] = reward + self.gamma * q_next[
batch_index, max_actions.astype(int)] * done
_ = self.brain_eval.train(state, q_target)
self.epsilon = max(self.epsilon * self.epsilon_dec, self.epsilon_min)
def update_network_parameters(self):
"""Update the target network with the parameters of the evaluation network."""
self.brain_target.copy_weights(self.brain_eval)
def save_model(self):
"""Save the model to a file."""
self.brain_eval.model.save(self.model_file)
def load_model(self):
"""Load the model from a file."""
self.brain_eval.model = load_model(self.model_file)
self.brain_target.model = load_model(self.model_file)
if self.epsilon == 0.0:
self.update_network_parameters()