-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelper.py
135 lines (118 loc) · 3.79 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Author/s: Yee Chuen Teoh (Author that contribute to the script)
# Title: helper.py (the name of the script)
# Project: OTHELLO-RL (the main project name, what project this script is apart of?)
# Description: helper methods/functions for 673 RL project (summary of what the script does)
# Reference/Directions:
'''
Usage:
python helper.py
'''
# Updates: (4/4/2023)
'''
4/4/2023
- import numpy
- addition of two helper functions pos_to_index and index_to_pos
- creation of the script
'''
#____________________________________________________________________________________________________
# imports
import numpy as np
import scipy
from scipy.spatial.distance import hamming
import os
#____________________________________________________________________________________________________
# functions/set ups
def pos_to_index(i,j):
'''
input:
@param i,j --> 2dim coordinate in the board
output:
@return n --> the location of (i,j) in the 1dim vertex
'''
if i > 7 or i < 0 or j > 7 or j < 0:
print("invalid coordiate for function pos_to_index")
return -1
else:
return 8*i+j
def index_to_pos(n):
'''
input:
@param n --> 1dim location in vertex of some coordinate
output:
@return (i,j) --> list of size 2 with index 0 = value for i, index 1 = value for j
'''
if n < 0 or n > 63:
print("invalid coordiate for function index_to_pos")
return -1
else:
j=n%8
i=int((n-j)/8)
return (i,j)
def hamming_distance(array_1, array_2):
"""Computes hamming distance of two arrays
Args:
array_1 (np.array)
array_2 (np.array)
Returns:
int
"""
distance = hamming(array_1, array_2)
return distance
def rewards(array_1, array_2):
'''
input:
@param array_1 -> numpy array 1
@param array_2 -> numpy array 2
output:
@return hamming_distance -> hamming distance of array 1 and array 2
'''
return hamming_distance(array_1, array_2)
def invert_board(board):
'''
input:
@param board -> othello board where white = -1, black = 1, empty = 0
output:
@return new_board -> where the numbers is inverted
'''
new_board = []
for row in board:
temp_list = []
for cell in row:
temp_list.append(cell*-1) # multiply by -1 to invert the numbers
new_board.append(temp_list)
return new_board
def eps_greedy(best_move, legal_moves, EPSILON):
"""picks best move with probability 1-eps
Args:
best_move (_type_): _description_
legal_moves (_type_): _description_
eps (float): value in range(0, 1]
"""
if np.random.binomial(1, EPSILON) == 1:
return np.random.choice(legal_moves)
else:
return best_move
def get_latest_iter(agent_type, trainer_type, save_dir):
"""finds the latest trained agent
Args:
agent_type (str): sarsaagent or qagent
trainer_type (str): rand or heu
save_dir (str): path to checkpointing location
return:
int: last trained iter
"""
checkpoint_dir = f"{save_dir}/models/{agent_type}/{trainer_type}/"
iters = [int(i.split(".")[0].split("_")[-1]) for i in os.listdir(checkpoint_dir)]
try:
return max(iters)
except:
return 0
def main():
pass
#____________________________________________________________________________________________________
# main
if __name__ == "__main__":
# TODO: change your python script title
print("\n-------------------- START of \"<helper.py>\" script --------------------")
main()
print("-------------------- END of \"<helper.py>\" script --------------------\n")