-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathagents.py
146 lines (123 loc) · 5.05 KB
/
agents.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from dataclasses import dataclass, field
import math
import random
import string
from typing import List, Tuple
import numpy as np
import pandas as pd
from base_logger import logger
from interactibles import Item
FALLBACK_ATTACK_NAME = 'basic'
PARTY_DEATH_PENALTY = 0
ID_LENGTH = 8
@dataclass
class Agent:
name: str
position: int
hp: int
mp: int
attack: int
defense: int = 0
statuses: List[Tuple[str, int]] = field(default_factory=list) # list of (status_name, time_applied)
status_resist: float = 0
special_moves: List[str] = field(default_factory=list)
has_gone: bool = False
crit_p: float = 0.05
def __post_init__(self):
self.max_hp = self.hp
self.max_mp = self.mp
self.special_moves = self.special_moves.strip().split()
self.id = f"{self.name}@{self.position}"
def short_repr(self):
repr_str = ""
if isinstance(self, Ally):
repr_str += "PARTY"
else:
repr_str += "ENEMY"
repr_str += f" {self.name} (id: {self.id})"
return repr_str
def effect_scale(self, target_field: str, action, action_field: str):
if action.effect_scaling == 'proportional':
return getattr(self, target_field) * getattr(action, action_field)
else:
return getattr(action, action_field)
def execute(self, state, action, target_list):
target_ids = [target.id for target in target_list]
logger.debug(f"AGENT {self.id} --({action.name})-> AGENT {target_ids}\nParameters:{action.pretty_repr()}")
reward = 0
effective_hp_cost = self.effect_scale('hp', action, 'hp_cost')
effective_mp_cost = self.effect_scale('mp', action, 'mp_cost')
effective_attack_cost = self.effect_scale('attack', action, 'attack_cost')
effective_defense_cost = self.effect_scale('defense', action, 'defense_cost')
effective_hp_delta = self.effect_scale('attack', action, 'hp_delta')
# TODO: effective_mp/atk/def_delta seems very niche, probably not going to implement now
self.hp = np.clip(self.hp - effective_hp_cost, 0, self.max_hp)
self.mp = np.clip(self.mp - effective_mp_cost, 0, self.max_mp)
self.attack = max(self.attack - effective_attack_cost, 0)
self.defense = max(self.defense - effective_defense_cost, 0)
if random.random() < action.status_self_p:
self.status = action.status_self
for target in target_list:
target.hp = np.clip(target.hp + effective_hp_delta, 0, target.max_hp)
if target.hp == 0:
info_str = target.short_repr()
if isinstance(target, Ally):
logger.info(f"KILL: {info_str} (-{-PARTY_DEATH_PENALTY})")
state.party.remove(target)
reward += PARTY_DEATH_PENALTY
else: # Enemy
logger.info(f"KILL: {info_str} (+{target.gold})")
state.enemies.remove(target)
state.gold += target.gold
reward += target.gold
continue
target.mp = np.clip(target.mp + action.mp_delta, 0, target.max_mp)
target.attack = max(target.attack + action.attack_delta, 0)
target.defense = max(target.defense + action.defense_delta, 0)
if random.random() < action.status_target_p:
target.status = action.status_target
state = self.apply_status_effects(state)
self.has_gone = True
state.turns_left -= 1
state.global_step += 1
return state, reward
def apply_status_effects(self, state):
for agent in state.party + state.enemies:
for status in agent.statuses:
if status.check_needed(state.global_step):
status.tick()
if status.can_escape():
agent.statuses.remove(status)
return state
def get_candidate_moves(self):
return self.special_moves
def get_legal_actions(self, entity_bank):
action_names = [FALLBACK_ATTACK_NAME]
for move_name in self.get_candidate_moves():
if self.mp >= entity_bank.action_data.loc[move_name].get('mp_cost'):
action_names.append(move_name)
info_str = self.short_repr()
logger.debug(f"{info_str} ACTIONS: {action_names}")
return action_names
def __eq__(self, other):
return self is other
@dataclass
class Ally(Agent):
weapon: Item = None
armor: Item = None
items: List[Item] = field(default_factory=list)
def equip(self, item: Item):
self.attack += item.attack_bonus
self.hp += item.hp_bonus
self.mp += item.mp_bonus
self.max_hp += item.hp_bonus
self.max_mp += item.mp_bonus
self.crit_p += item.crit_bonus
self.items.append(item)
def get_candidate_moves(self):
return self.special_moves + [item.move for item in self.items if not pd.isnull(item.move)]
@dataclass
class Enemy(Agent):
min_level: int = 0
max_level: int = 0
gold: int = 1