-
Notifications
You must be signed in to change notification settings - Fork 2
/
polling.py
138 lines (118 loc) · 4.69 KB
/
polling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import numpy as np
import tkinter as tk
from tkinter import messagebox
# Model configuration {
N_LINES = 5
MAX_Q, MAX_I = 10, 10
UTILIZATION = 0.6 # arrival rate / service rate
C_HOLDING = 1
C_BACKLOGGING = 2
K_II = 1
K_IJ = 20
K_0I = 100
BOUNDARY_PENALTY = 5000
LONGTERM_BONUS = 0
MAX_STEPS = 100
# }
# Don't change the configuration below {
UNIT = 30
UPPER_BOUND = MAX_Q
LOWER_BOUND = MAX_I
BOUND = UPPER_BOUND + LOWER_BOUND
# }
class Polling(tk.Tk):
def __init__(self):
np.random.seed(1)
super(Polling, self).__init__()
self.action_space = np.arange(N_LINES + 1).reshape(N_LINES + 1, 1) # 0 means null action
self.state_space = np.array(list(zip(*(x.flat for x in np.meshgrid(*(range(-MAX_Q, MAX_I + 1) for i in range(N_LINES))))))).reshape(-1, N_LINES)
self.geometry('{}x{}'.format(N_LINES * UNIT, BOUND * UNIT + 55))
self.score = 0
self.play_mode = False
self.title('polling')
self.last_action = 0
[self.columnconfigure(i, weight=1) for i in range(N_LINES)]
[tk.Label(self, text=str(i + 1), fg='white', bg='black').grid(row=0, column=i, sticky='WE') for i in range(N_LINES)]
self.canvas = tk.Canvas(self, bg='white', height=BOUND * UNIT + 5, width=N_LINES * UNIT)
[self.canvas.create_line(i * UNIT, 0, i * UNIT, BOUND * UNIT + 5) for i in range(0, N_LINES)]
self.canvas.create_line(0, UPPER_BOUND * UNIT + 5, N_LINES * UNIT, UPPER_BOUND * UNIT + 5, fill='grey', dash=(2, 4))
self.init_lines()
self.canvas.grid(row=1, columnspan=N_LINES)
self.var = tk.StringVar()
self.var.set('Score: {:.1f}'.format(self.score))
self.score_board = tk.Label(self, textvariable=self.var, fg='white', bg='black')
self.score_board.grid(row=2, columnspan=N_LINES, sticky='WE')
self.steps = 0
self.max_steps = MAX_STEPS
if self.play_mode:
self.bind('<Key>', self.step)
def init_lines(self):
self.lines = [None] * N_LINES
for i in range(N_LINES):
self.lines[i] = self.canvas.create_line(i * UNIT, UPPER_BOUND * UNIT + 5,
(i + 1) * UNIT, UPPER_BOUND * UNIT + 5,
fill='red', width=5)
def reset(self):
self.update()
[self.canvas.delete(l) for l in self.lines]
self.init_lines()
self.score = 0
self.steps = 0
observation = np.zeros(N_LINES)
return observation
def end_game(self, y):
if self.play_mode and (min(y) <= 0 or max(y) >= BOUND * UNIT):
messagebox.showinfo('polling', 'Game Over')
self.destroy()
return True
return False
def c(self, y):
# non-fixed cost function with inventory vector y (positive=holding, negative=backlogging)
return C_BACKLOGGING * np.maximum(-y, 0).sum() + C_HOLDING * np.maximum(y, 0).sum()
def k(self, action, last_action):
# fixed cost function
if action == 0:
return 0
elif action == last_action:
return K_II
elif last_action:
return K_IJ
else:
return K_0I
def step(self, event):
# check if the game is ended
y = np.array([self.canvas.coords(l)[1] - 5 for l in self.lines])
if self.end_game(y): return
# arrival
for i in range(N_LINES):
line = self.lines[i]
dy = min(int(np.random.poisson(UTILIZATION / N_LINES)) * UNIT, self.canvas.coords(line)[1] - 5) # poisson a, deterministic s
self.canvas.move(line, 0, -dy)
# service
action = int(event.keysym) if self.play_mode else event
if action: self.canvas.move(self.lines[action - 1], 0, UNIT)
y = np.array([self.canvas.coords(l)[1] - 5 for l in self.lines])
y_shifted = y // UNIT - UPPER_BOUND
reward = -self.c(y_shifted) - self.k(action, self.last_action)
end = any(np.isin(y_shifted, [-UPPER_BOUND, LOWER_BOUND]))
if end:
reward -= BOUNDARY_PENALTY
if self.steps > 0 and self.steps % 10 == 0:
reward += LONGTERM_BONUS
# return observation
observation = np.minimum(np.maximum(y_shifted, -MAX_Q), MAX_I)
self.score += reward
end |= ((self.steps == self.max_steps) and not self.play_mode)
self.var.set('Score: {:.1f}'.format(self.score))
self.last_action = action
self.steps += 1
return observation, reward, end
def render(self):
self.update()
def run(self, f):
self.after(1, f)
self.mainloop()
if __name__ == '__main__':
env = Polling()
env.play_mode = True
env.mainloop()