forked from pmuens/alphago
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_pg_bot.py
61 lines (53 loc) · 1.74 KB
/
eval_pg_bot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import argparse
import h5py
from collections import namedtuple
from dlgo import agent
from dlgo import scoring
from dlgo.goboard_fast import GameState, Player
BOARD_SIZE = 19
class GameRecord(namedtuple('GameRecord', 'moves winner')):
pass
def simulate_game(black_player, white_player):
moves = []
game = GameState.new_game(BOARD_SIZE)
agents = {
Player.black: black_player,
Player.white: white_player
}
while not game.is_over():
next_move = agents[game.next_player].select_move(game)
moves.append(next_move)
game = game.apply_move(next_move)
game_result = scoring.compute_game_result(game)
print(game_result)
return GameRecord(
moves=moves,
winner=game_result.winner
)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--agent1', required=True)
parser.add_argument('--agent2', required=True)
parser.add_argument('--num-games', '-n', type=int, default=10)
args = parser.parse_args()
agent1 = agent.load_policy_agent(h5py.File(args.agent1))
agent2 = agent.load_policy_agent(h5py.File(args.agent2))
num_games = args.num_games
wins = 0
losses = 0
color1 = Player.black
for i in range(num_games):
print('Simulating game %d/%d...' % (i + 1, num_games))
if color1 == Player.black:
black_player, white_player = agent1, agent2
else:
white_player, black_player = agent1, agent2
game_record = simulate_game(black_player, white_player)
if game_record.winner == color1:
wins += 1
else:
losses += 1
color1 = color1.other
print('Agent 1 record: %d/%d' % (wins, wins + losses))
if __name__ == '__main__':
main()