-
Notifications
You must be signed in to change notification settings - Fork 0
/
env_util.py
116 lines (91 loc) · 3.25 KB
/
env_util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
from collections import OrderedDict
from numbers import Number
import numpy as np
from gym.spaces import Box
ENV_ASSET_DIR = os.path.join(os.path.dirname(__file__), 'assets')
def create_stats_ordered_dict(
name,
data,
stat_prefix=None,
always_show_all_stats=True,
exclude_max_min=False,
):
if stat_prefix is not None:
name = "{} {}".format(stat_prefix, name)
if isinstance(data, Number):
return OrderedDict({name: data})
if len(data) == 0:
return OrderedDict()
if isinstance(data, tuple):
ordered_dict = OrderedDict()
for number, d in enumerate(data):
sub_dict = create_stats_ordered_dict(
"{0}_{1}".format(name, number),
d,
)
ordered_dict.update(sub_dict)
return ordered_dict
if isinstance(data, list):
try:
iter(data[0])
except TypeError:
pass
else:
data = np.concatenate(data)
if (isinstance(data, np.ndarray) and data.size == 1
and not always_show_all_stats):
return OrderedDict({name: float(data)})
stats = OrderedDict([
(name + ' Mean', np.mean(data)),
(name + ' Std', np.std(data)),
])
if not exclude_max_min:
stats[name + ' Max'] = np.max(data)
stats[name + ' Min'] = np.min(data)
return stats
def get_generic_path_information(paths, stat_prefix=''):
"""
Get an OrderedDict with a bunch of statistic names and values.
"""
statistics = OrderedDict()
returns = [sum(path["rewards"]) for path in paths]
rewards = np.vstack([path["rewards"] for path in paths])
statistics.update(create_stats_ordered_dict('Rewards', rewards,
stat_prefix=stat_prefix))
statistics.update(create_stats_ordered_dict('Returns', returns,
stat_prefix=stat_prefix))
actions = [path["actions"] for path in paths]
if len(actions[0].shape) == 1:
actions = np.hstack([path["actions"] for path in paths])
else:
actions = np.vstack([path["actions"] for path in paths])
statistics.update(create_stats_ordered_dict(
'Actions', actions, stat_prefix=stat_prefix
))
statistics['Num Paths'] = len(paths)
return statistics
def get_average_returns(paths):
returns = [sum(path["rewards"]) for path in paths]
return np.mean(returns)
def get_path_lengths(paths):
return [len(path['observations']) for path in paths]
def get_stat_in_paths(paths, dict_name, scalar_name):
if len(paths) == 0:
return np.array([[]])
if type(paths[0][dict_name]) == dict:
# Support rllab interface
return [path[dict_name][scalar_name] for path in paths]
return [
[info[scalar_name] for info in path[dict_name]]
for path in paths
]
def get_asset_full_path(file_name):
return os.path.join(ENV_ASSET_DIR, file_name)
def concatenate_box_spaces(*spaces):
"""
Assumes dtypes of all spaces are the of the same type
"""
low = np.concatenate([space.low for space in spaces])
high = np.concatenate([space.high for space in spaces])
return Box(low=low, high=high, dtype=np.float32)