-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcart_poll.py
63 lines (46 loc) · 1.58 KB
/
cart_poll.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import gym
import random
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
def build_model(states, actions):
model = Sequential()
model.add(Flatten(input_shape=(1, states)))
model.add(Dense(24, activation="relu"))
model.add(Dense(24, activation="relu"))
model.add(Dense(actions, activation="linear"))
return model
def build_agent(model, actions):
policy = BoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(
model=model, memory=memory, policy=policy, nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2
)
return dqn
env = gym.make("CartPole-v0")
states = env.observation_space.shape[0]
actions = env.action_space.n
print(actions)
episodes = 10
for episode in range(1, episodes + 1):
state = env.reset()
done = False
score = 0
while not done:
# env.render()
action = random.choice([0, 1])
state, reward, done, info = env.step(action)
score += reward
print("Episode:{} Score:{}".format(episode, score))
model = build_model(states, actions)
model.summary()
print("building model")
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=["mae"])
dqn.fit(env.env, nb_steps=2500, visualize=False, verbose=1)
scores = dqn.test(env, nb_episodes=10, visualize=False)
print(np.mean(scores.history["episode_reward"]))