-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtorcsGame.py
89 lines (74 loc) · 2.71 KB
/
torcsGame.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/python3
from gym_torcs.gym_torcs import TorcsEnv
from gameAgent import Agent
import numpy as np
import os
import time
import random
import json
# import math
config=json.load(open("./torcs_central/config.json"))
vision = False
episode_count = config['maxEpisodes']
max_steps = config['maxSteps']
reward = 0
done = False
step = 0
# Generate a Torcs environment
env = TorcsEnv(vision=vision,
throttle=False ,
default_speed=config['default_speed'] ,
textMode=True,xmlPath='./gym_torcs/practice.xml')
agent = Agent(1,verbose=True) # steering only
epsilon=config['exploration']
print("TORCS Experiment Start.")
for i in range(episode_count):
try:
print("Episode : " + str(i))
agent.pullFromServer()
time.sleep(1)
if np.mod(i, 3) == 0:
# Sometimes you need to relaunch TORCS because of the memory leak error
ob = env.reset(relaunch=True)
else:
ob = env.reset()
total_reward = 0.
action=np.array([np.random.normal(0,1)])
for j in range(max_steps):
ob, reward, done, _ = env.step(action)
reward = ob.speedX*np.cos(ob.angle)-np.abs(ob.speedX*np.sin(ob.angle))-ob.speedX*np.abs(ob.trackPos)
action = agent.act(env, ob, reward, done, vision)[0]
total_reward += reward
print ("reward",reward)
step += 1
if done:
print('-'*80,'\nDone\n','-'*80)
# agent.pushToServer()
agent.dumpModels(metaData={'total_reward':total_reward,
'steps_taken':step,
'episode_done':i
})
time.sleep(1)
break
print("TOTAL REWARD @ " + str(i) +" -th Episode : " + str(total_reward))
print("Total Step: " + str(step))
print("")
time.sleep(0.5)
except KeyboardInterrupt:
print ("process killed by user")
os.system('pkill torcs')
agent.dumpModels(metaData={'total_reward':total_reward,
'steps_taken':step,
'episode_done':i
})
quit()
# finally:
# print ("process killed by system")
# os.system('pkill torcs')
# agent.dumpModels(metaData={'total_reward':total_reward,
# 'steps_taken':step,
# 'episode_done':i
# })
# quit()
env.end() # This is for shutting down TORCS
print("Finish.")