-
Notifications
You must be signed in to change notification settings - Fork 19
/
Copy pathresources.py
168 lines (128 loc) · 4.82 KB
/
resources.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import math
from datetime import datetime, timezone
from functools import lru_cache
import cpuinfo
import psutil
from aiohttp import web
from aleph_message.models import ItemHash
from aleph_message.models.execution.environment import CpuProperties
from pydantic import BaseModel, Field
from aleph.vm.conf import settings
from aleph.vm.sevclient import SevClient
from aleph.vm.utils import (
check_amd_sev_es_supported,
check_amd_sev_snp_supported,
check_amd_sev_supported,
cors_allow_all,
)
class Period(BaseModel):
datetime: datetime
class LoadAverage(BaseModel):
load1: float
load5: float
load15: float
@classmethod
def from_psutil(cls, psutil_loadavg: tuple[float, float, float]):
return cls(
load1=psutil_loadavg[0],
load5=psutil_loadavg[1],
load15=psutil_loadavg[2],
)
class CoreFrequencies(BaseModel):
min: float
max: float
@classmethod
def from_psutil(cls, psutil_freq: psutil._common.scpufreq):
min_ = psutil_freq.min or psutil_freq.current
max_ = psutil_freq.max or psutil_freq.current
return cls(min=min_, max=max_)
class CpuUsage(BaseModel):
count: int
load_average: LoadAverage
core_frequencies: CoreFrequencies
class MemoryUsage(BaseModel):
total_kB: int
available_kB: int
class DiskUsage(BaseModel):
total_kB: int
available_kB: int
class UsagePeriod(BaseModel):
start_timestamp: datetime
duration_seconds: float
class MachineProperties(BaseModel):
cpu: CpuProperties
class MachineUsage(BaseModel):
cpu: CpuUsage
mem: MemoryUsage
disk: DiskUsage
period: UsagePeriod
properties: MachineProperties
active: bool = True
@lru_cache
def get_machine_properties() -> MachineProperties:
"""Fetch machine properties such as architecture, CPU vendor, ...
These should not change while the supervisor is running.
In the future, some properties may have to be fetched from within a VM.
"""
cpu_info = cpuinfo.get_cpu_info() # Slow
return MachineProperties(
cpu=CpuProperties(
architecture=cpu_info.get("raw_arch_string", cpu_info.get("arch_string_raw")),
vendor=cpu_info.get("vendor_id", cpu_info.get("vendor_id_raw")),
features=list(
filter(
None,
(
"sev" if check_amd_sev_supported() else None,
"sev_es" if check_amd_sev_es_supported() else None,
"sev_snp" if check_amd_sev_snp_supported() else None,
),
)
),
),
)
@cors_allow_all
async def about_system_usage(_: web.Request):
"""Public endpoint to expose information about the system usage."""
period_start = datetime.now(timezone.utc).replace(second=0, microsecond=0)
usage: MachineUsage = MachineUsage(
cpu=CpuUsage(
count=psutil.cpu_count(),
load_average=LoadAverage.from_psutil(psutil.getloadavg()),
core_frequencies=CoreFrequencies.from_psutil(psutil.cpu_freq()),
),
mem=MemoryUsage(
total_kB=math.ceil(psutil.virtual_memory().total / 1000),
available_kB=math.floor(psutil.virtual_memory().available / 1000),
),
disk=DiskUsage(
total_kB=psutil.disk_usage(str(settings.PERSISTENT_VOLUMES_DIR)).total // 1000,
available_kB=psutil.disk_usage(str(settings.PERSISTENT_VOLUMES_DIR)).free // 1000,
),
period=UsagePeriod(
start_timestamp=period_start,
duration_seconds=60,
),
properties=get_machine_properties(),
)
return web.json_response(text=usage.json(exclude_none=True))
@cors_allow_all
async def about_certificates(request: web.Request):
"""Public endpoint to expose platform certificates for confidential computing."""
if not settings.ENABLE_CONFIDENTIAL_COMPUTING:
return web.HTTPBadRequest(reason="Confidential computing setting not enabled on that server")
sev_client: SevClient = request.app["sev_client"]
return web.FileResponse(await sev_client.get_certificates())
class Allocation(BaseModel):
"""An allocation is the set of resources that are currently allocated on this orchestrator.
It contains the item_hashes of all persistent VMs, instances, on-demand VMs and jobs.
"""
persistent_vms: set[ItemHash] = Field(default_factory=set)
instances: set[ItemHash] = Field(default_factory=set)
on_demand_vms: set[ItemHash] | None = None
jobs: set[ItemHash] | None = None
class VMNotification(BaseModel):
"""A notification to the orchestrator that a VM has been created or destroyed.
This is typically sent by a user that just created a VM in order to quickly ensure the creation of the VM.
"""
instance: ItemHash