forked from google/ml-fairness-gym
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathattention_allocation.py
321 lines (263 loc) · 12.7 KB
/
attention_allocation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
# coding=utf-8
# Copyright 2020 The ML Fairness Gym Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Lint as: python2, python3
"""ML Fairness gym location-based attention allocation environment.
This environment is meant to be a general but simple location-based
attention allocation environment.
Situations that could be modeled by this environment are pest-control, or
allocation of social interventions like mobile STD testing clinics.
This is not a good environment to model predictive policing in part due to the
concerns raised in "Dirty Data, Bad Predictions: How Civil Rights Violations
Impact Police Data, Predictive Policing Systems, and Justice", Richardson et al.
(https://www.nyulawreview.org/wp-content/uploads/2019/04/NYULawReview-94-Richardson-Schultz-Crawford.pdf)
The environment contains k locations. At each timestep, some number of incidents
occur (such as rat infestations) in each location, and the agent has to allocate
some of its attention to each location. Each attention unit may then "see" or
discover some number of the incidents.
Incidents are drawn from a Poisson centered on an underlying incident rate.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
from typing import List, Optional, Tuple
import attr
import core
from spaces import multinomial
from gym import spaces
import numpy as np
from six.moves import range
@attr.s
class Params(core.Params):
"""Attention Allocation parameters."""
# Number of locations represented in the environment.
n_locations = attr.ib(default=2) # type: int
# Prior counts of incidents for each location.
prior_incident_counts = attr.ib(default=(650, 250)) # type: Tuple[int, ...]
# Incident rates for each location.
incident_rates = attr.ib(factory=lambda: [4., 3.]) # type: List[float]
# Discovered incidents weight - weights how many incidents are discovered
# incidents, for which attention needs to be allocated to be discovered
discovered_incident_weight = attr.ib(default=1.0) # type: float
# Reported inicdents weight - weights how many incidents are reported, which
# are observed from a location without attention being allocated there.
reported_incident_weight = attr.ib(default=0.0) # type: float
# Number of attention units that are available to be allocated.
n_attention_units = attr.ib(default=1) # type: int
# If True, each unit of attention can discover more than one incident.
# If False, each unit of attention can discover at most one incident.
attention_replacement = attr.ib(default=False) # type: bool
# Probability an incident is missed by an attention unit for each location.'
miss_incident_prob = attr.ib(default=(0.2, 0.2)) # type: Tuple[float, ...]
# Probability an incident is falsely reported without being discovered.
extra_incident_prob = attr.ib(default=(0.0, 0.0)) # type: Tuple[float, ...]
# The rate at which the incident_rates change in response to allocation
# of attention units.
dynamic_rate = attr.ib(default=0.0) # type: float
# Location feature parameters.
# Means and covariances of the multivariate gaussians for the features.
feature_means = attr.ib(factory=lambda: [1., 1.])
feature_covariances = attr.ib(factory=lambda: [[0.8, 0.0], [0.0, 0.7]])
# Vector with coefficients to control the correlation between features and
# underlying incident rates.
feature_coefficients = attr.ib(default=(0, 1))
@attr.s(cmp=False)
class State(core.State):
"""Attention Allocation state."""
# Parameters.
params = attr.ib() # type: Params
# A ndarray of integers representing the incidents seen at each location
incidents_seen = attr.ib() # type: np.ndarray
# A ndarray of integers representing the incidents reported for each location.
incidents_reported = attr.ib() # type: np.ndarray
# A ndarray of integers representing the incidents reported for each location.
incidents_occurred = attr.ib() # type: np.ndarray
# A ndarray of floats representing features for each location.
location_features = attr.ib() # type: np.ndarray
# Random state.
rng = attr.ib(factory=np.random.RandomState) # type: np.random.RandomState
def _sample_incidents(rng, params):
"""Generates new crimeincident occurrences across locations.
Args:
rng: A numpy RandomState() object acting as a random number generator.
params: A Params instance for this environment.
Returns:
incidents_occurred: a list of integers of number of incidents for each
location.
that could be discovered by attention.
reported_incidents: a list of integers of a number of incidents reported
directly.
"""
# pylint: disable=g-complex-comprehension
crimes = [
rng.poisson([
params.incident_rates[i] * params.discovered_incident_weight,
params.incident_rates[i] * params.reported_incident_weight
]) for i in range(params.n_locations)
]
incidents_occurred, reported_incidents = np.hsplit(np.asarray(crimes), 2)
return incidents_occurred.flatten(), reported_incidents.flatten()
def _get_location_features(params, rng, incidents_occurred):
"""Returns a matrix of float features for each location.
Calculates new feature means based on incidents occurred and draws features
from a multivariate gaussian distribution using the parameter defined means
and covariances.
Args:
params: A Params instance for this environment.
rng: A numpy RandomState() object acting as a random number generator.
incidents_occurred: A list of integers of number of incidents for each
location that occurred.
Returns:
A numpy array of n_locations by number of features.
"""
# Move feature means based on incidents that occurred to make m by k matrix
# where each row is the means for the features for location k at this step.
shifted_feature_means = params.feature_means + np.outer(
incidents_occurred, params.feature_coefficients)
feature_noise = rng.multivariate_normal(
np.zeros_like(params.feature_means),
params.feature_covariances,
size=params.n_locations)
return shifted_feature_means + feature_noise
def _update_state(state, incidents_occurred, incidents_reported, action):
"""Updates the state given the agents' action.
This function simulates attention discovering incidents in order to determine
and populate the number of seen incidents in the state.
Args:
state: a 'State' object with the state to be updated.
incidents_occurred: a vector of length equal to n_locations in state.param
that contains integer counts of incidents that occurred for each location.
incidents_reported: a vector of length equal to n_locations in state.param
that contains integer counts of incidents that are reported for each
location.
action: an action in the action space of LocationAllocationEnv that is a
vector of integer counts of attention allocated to each location.
"""
params = state.params
if params.attention_replacement:
discover_probability = 1 - (np.power(params.miss_incident_prob, action))
incidents_seen = [
state.rng.binomial(incidents_occurred[i], discover_probability[i])
for i in range(params.n_locations)
]
else:
# Attention units are without replacement, so each units can only catch 1
# crime.
incidents_seen = [0] * params.n_locations
for location_ind in range(params.n_locations):
unused_attention = action[location_ind]
# Iterate over crime incidents and determine if each one is "caught".
for _ in range(incidents_occurred[location_ind]):
incidents_discovered = state.rng.binomial(
1, 1 - (np.power(params.miss_incident_prob[location_ind],
unused_attention)))
unused_attention -= incidents_discovered
incidents_seen[location_ind] += incidents_discovered
if unused_attention <= 0:
# Terminate for loop early because there are no attention left.
break
# If there are unused individuals have them generate false incidents.
for _ in range(unused_attention):
incidents_seen[location_ind] += state.rng.binomial(
1, params.extra_incident_prob[location_ind])
# Handle dynamics.
for location_ind in range(params.n_locations):
attention = action[location_ind]
if attention == 0:
params.incident_rates[location_ind] += params.dynamic_rate
else:
params.incident_rates[location_ind] = max(
0.0, params.incident_rates[location_ind] -
(params.dynamic_rate * attention))
state.location_features = _get_location_features(params, state.rng,
incidents_occurred)
state.incidents_occurred = np.asarray(incidents_occurred)
state.incidents_seen = np.asarray(incidents_seen)
state.incidents_reported = np.asarray(incidents_reported)
class LocationAllocationEnv(core.FairnessEnv):
"""Location based allocation environment.
In each step, agent allocates attention across locations. Environment then
simulates seen incidents based on incidents that occurred and attention
distribution.
Incidents are generated from a poisson distribution of underlying incidents
rates for each location.
"""
def __init__(self, params = None):
if params is None:
params = Params()
self.action_space = multinomial.Multinomial(params.n_locations,
params.n_attention_units)
assert (params.n_locations == len(params.prior_incident_counts) and
params.n_locations == len(params.incident_rates))
# Define the observation space.
# Crimes seen is multidiscrete because it may not sum to n_attention_units.
# MultiDiscrete uses dtype=np.int32.
if params.attention_replacement:
# If there is attention replacement, the number of attention doesn't bound
# the incidents_seen.
incidents_seen_space = spaces.MultiDiscrete([np.iinfo(np.int32).max] *
params.n_locations)
else:
incidents_seen_space = spaces.MultiDiscrete(
[params.n_attention_units + 1] * params.n_locations)
incidents_reported_space = spaces.MultiDiscrete([np.iinfo(np.int32).max] *
params.n_locations)
n_features = len(params.feature_means)
location_features_space = spaces.Box(
low=-np.inf,
high=np.inf,
shape=(params.n_locations, n_features),
dtype=np.float32)
# The first observation from this state is not necessarily contained by this
# observation space. It conveys a prior of the initial incident counts.
self.observable_state_vars = {
'incidents_seen': incidents_seen_space,
'incidents_reported': incidents_reported_space,
'location_features': location_features_space
}
super(LocationAllocationEnv, self).__init__(params)
self._state_init()
def _state_init(self, rng=None):
n_locations = self.initial_params.n_locations
self.state = State(
rng=rng or np.random.RandomState(),
params=copy.deepcopy(self.initial_params),
incidents_seen=np.zeros(n_locations, dtype='int64'),
incidents_reported=np.zeros(n_locations, dtype='int64'),
incidents_occurred=np.zeros(n_locations, dtype='int64'),
location_features=np.zeros(
(n_locations, len(self.initial_params.feature_means))))
def reset(self):
"""Resets the environment."""
self._state_init(self.state.rng)
return super(LocationAllocationEnv, self).reset()
def _is_done(self):
"""Never returns true because there is no end case to this environment."""
return False
def _step_impl(self, state, action):
"""Run one timestep of the environment's dynamics.
In a step, the agent allocates attention across disctricts. The environement
then returns incidents seen as an observation based off the actual hidden
incident occurrences and attention allocation.
Args:
state: A 'State' object containing the current state.
action: An action in 'action space'.
Returns:
A 'State' object containing the updated state.
"""
incidents_occurred, reported_incidents = _sample_incidents(
state.rng, state.params)
_update_state(state, incidents_occurred, reported_incidents, action)
return state