Skip to content

Commit

Permalink
revert proc_id
Browse files Browse the repository at this point in the history
  • Loading branch information
fregataa committed Jan 24, 2025
1 parent fa0c55e commit 08c0f58
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 26 deletions.
9 changes: 1 addition & 8 deletions src/ai/backend/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,6 @@ def __init__(
distro: str,
local_config: Mapping[str, Any],
computers: MutableMapping[DeviceName, ComputerContext],
proc_uid: int,
restarting: bool = False,
) -> None:
self.image_labels = kernel_config["image"]["labels"]
Expand All @@ -254,7 +253,6 @@ def __init__(
self.computers = computers
self.restarting = restarting
self.local_config = local_config
self.proc_uid = proc_uid

@abstractmethod
async def get_extra_envs(self) -> Mapping[str, str]:
Expand Down Expand Up @@ -575,7 +573,6 @@ def mount_static_binary(filename: str, target_path: str) -> None:
already_injected_hooks.add(hook_path)

self.additional_allowed_syscalls = sorted(list(additional_allowed_syscalls_set))
environ["ADDITIONAL_GIDS"] = ",".join(map(str, additional_gid_set))
update_additional_gids(environ, additional_gids)

def get_overriding_uid(self) -> Optional[int]:
Expand Down Expand Up @@ -619,7 +616,6 @@ class AbstractAgent(
computers: MutableMapping[DeviceName, ComputerContext]
images: Mapping[str, str]
port_pool: Set[int]
proc_uid: int

redis: Redis

Expand Down Expand Up @@ -675,7 +671,6 @@ def __init__(
local_config["container"]["port-range"][1] + 1,
)
)
self.proc_uid = os.geteuid()
self.stats_monitor = stats_monitor
self.error_monitor = error_monitor
self._pending_creation_tasks = defaultdict(set)
Expand Down Expand Up @@ -1761,7 +1756,6 @@ async def init_kernel_context(
kernel_image: ImageRef,
kernel_config: KernelCreationConfig,
*,
proc_uid: int,
restarting: bool = False,
cluster_ssh_port_mapping: Optional[ClusterSSHPortMapping] = None,
) -> AbstractKernelCreationContext:
Expand Down Expand Up @@ -1888,7 +1882,6 @@ async def create_kernel(
kernel_image,
kernel_config,
restarting=restarting,
proc_uid=self.proc_uid,
cluster_ssh_port_mapping=cluster_info.get("cluster_ssh_port_mapping"),
)
environ: dict[str, str] = {**kernel_config["environ"]}
Expand All @@ -1911,10 +1904,10 @@ async def create_kernel(
if KernelFeatures.UID_MATCH in ctx.kernel_features:
environ["LOCAL_GROUP_ID"] = str(kernel_gid)

update_additional_gids(environ, sgids)
environ.update(
await ctx.get_extra_envs(),
)
update_additional_gids(environ, sgids)
image_labels = kernel_config["image"]["labels"]

agent_architecture = get_arch_name()
Expand Down
6 changes: 1 addition & 5 deletions src/ai/backend/agent/docker/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,6 @@ def __init__(
agent_sockpath: Path,
resource_lock: asyncio.Lock,
network_plugin_ctx: NetworkPluginContext,
proc_uid: int,
restarting: bool = False,
cluster_ssh_port_mapping: Optional[ClusterSSHPortMapping] = None,
gwbridge_subnet: Optional[str] = None,
Expand All @@ -226,7 +225,6 @@ def __init__(
distro,
local_config,
computers,
proc_uid=proc_uid,
restarting=restarting,
)
scratch_dir = (self.local_config["container"]["scratch-root"] / str(kernel_id)).resolve()
Expand Down Expand Up @@ -303,7 +301,7 @@ async def prepare_resource_spec(self) -> Tuple[KernelResourceSpec, Optional[Mapp
return resource_spec, resource_opts

def _chown(self, paths: Iterable[Path], uid: Optional[int], gid: Optional[int]) -> None:
if self.proc_uid == 0: # only possible when I am root.
if os.geteuid() == 0: # only possible when I am root.
for p in paths:
if KernelFeatures.UID_MATCH in self.kernel_features:
_uid = uid if uid is not None else self.local_config["container"]["kernel-uid"]
Expand Down Expand Up @@ -1706,7 +1704,6 @@ async def init_kernel_context(
kernel_image: ImageRef,
kernel_config: KernelCreationConfig,
*,
proc_uid: int,
restarting: bool = False,
cluster_ssh_port_mapping: Optional[ClusterSSHPortMapping] = None,
) -> DockerKernelCreationContext:
Expand All @@ -1725,7 +1722,6 @@ async def init_kernel_context(
self.agent_sockpath,
self.resource_lock,
self.network_plugin_ctx,
proc_uid=proc_uid,
restarting=restarting,
cluster_ssh_port_mapping=cluster_ssh_port_mapping,
gwbridge_subnet=self.gwbridge_subnet,
Expand Down
4 changes: 0 additions & 4 deletions src/ai/backend/agent/dummy/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def __init__(
distro: str,
local_config: Mapping[str, Any],
computers: MutableMapping[DeviceName, ComputerContext],
proc_uid: int,
restarting: bool = False,
*,
dummy_config: Mapping[str, Any],
Expand All @@ -77,7 +76,6 @@ def __init__(
distro,
local_config,
computers,
proc_uid=proc_uid,
restarting=restarting,
)
self.dummy_config = dummy_config
Expand Down Expand Up @@ -322,7 +320,6 @@ async def init_kernel_context(
kernel_image: ImageRef,
kernel_config: KernelCreationConfig,
*,
proc_uid: int,
restarting: bool = False,
cluster_ssh_port_mapping: Optional[ClusterSSHPortMapping] = None,
) -> DummyKernelCreationContext:
Expand All @@ -337,7 +334,6 @@ async def init_kernel_context(
distro,
self.local_config,
self.computers,
proc_uid=proc_uid,
restarting=restarting,
dummy_config=self.dummy_config,
)
Expand Down
4 changes: 0 additions & 4 deletions src/ai/backend/agent/kubernetes/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ def __init__(
computers: MutableMapping[DeviceName, ComputerContext],
workers: Mapping[str, Mapping[str, str]],
static_pvc_name: str,
proc_uid: int,
restarting: bool = False,
) -> None:
super().__init__(
Expand All @@ -125,7 +124,6 @@ def __init__(
distro,
local_config,
computers,
proc_uid=proc_uid,
restarting=restarting,
)
scratch_dir = (self.local_config["container"]["scratch-root"] / str(kernel_id)).resolve()
Expand Down Expand Up @@ -1040,7 +1038,6 @@ async def init_kernel_context(
kernel_image: ImageRef,
kernel_config: KernelCreationConfig,
*,
proc_uid: int,
restarting: bool = False,
cluster_ssh_port_mapping: Optional[ClusterSSHPortMapping] = None,
) -> KubernetesKernelCreationContext:
Expand All @@ -1058,7 +1055,6 @@ async def init_kernel_context(
self.computers,
self.workers,
"backend-ai-static-pvc",
proc_uid=proc_uid,
restarting=restarting,
)

Expand Down
8 changes: 3 additions & 5 deletions src/ai/backend/manager/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -626,9 +626,7 @@ async def create_session(
{
"uid": user_row.container_uid,
"main_gid": user_row.container_main_gid,
"supplementary_gids": (
user_row.container_supplementary_gids or []
),
"supplementary_gids": (user_row.container_gids or []),
"image_ref": image_ref,
"cluster_role": DEFAULT_ROLE,
"cluster_idx": 1,
Expand Down Expand Up @@ -1334,7 +1332,7 @@ async def enqueue_session(
),
"uid": kernel["uid"],
"main_gid": kernel["main_gid"],
"supplementary_gids": kernel["supplementary_gids"],
"gids": kernel["supplementary_gids"],
"image": image_ref.canonical,
# "image_id": image_row.id,
"architecture": image_ref.architecture,
Expand Down Expand Up @@ -1854,7 +1852,7 @@ def get_image_conf(kernel: KernelRow) -> ImageConfig:
"cluster_hostname": binding.kernel.cluster_hostname,
"uid": binding.kernel.uid,
"main_gid": binding.kernel.main_gid,
"supplementary_gids": binding.kernel.supplementary_gids or [],
"supplementary_gids": binding.kernel.gids or [],
"idle_timeout": int(idle_timeout),
"mounts": [item.to_json() for item in scheduled_session.vfolder_mounts],
"environ": {
Expand Down

0 comments on commit 08c0f58

Please sign in to comment.