diff --git a/src/ai/backend/agent/agent.py b/src/ai/backend/agent/agent.py index f46fcaf0f7..8e3a5dbd7d 100644 --- a/src/ai/backend/agent/agent.py +++ b/src/ai/backend/agent/agent.py @@ -196,6 +196,8 @@ def update_additional_gids(environ: MutableMapping[str, str], gids: Iterable[int]) -> None: + if not gids: + return if orig_additional_gids := environ.get("ADDITIONAL_GIDS"): orig_add_gids = {int(gid) for gid in orig_additional_gids.split(",") if gid} additional_gids = orig_add_gids | set(gids) @@ -574,8 +576,8 @@ def get_overriding_uid(self) -> Optional[int]: def get_overriding_gid(self) -> Optional[int]: return None - def get_supplementary_gids(self) -> set[int]: - return set() + def get_supplementary_gids(self) -> Optional[list[int]]: + return None KernelCreationContextType = TypeVar( @@ -1879,17 +1881,14 @@ async def create_kernel( uid = self.local_config["container"]["kernel-uid"] environ["LOCAL_USER_ID"] = str(uid) - # The first element of the `gids` list becomes the kernel's main gid. - # Elements after the first one become supplementary gids. - gids: list[int] = [] if (ogid := ctx.get_overriding_gid()) is not None: - gids.append(ogid) - if KernelFeatures.UID_MATCH in ctx.kernel_features: - gids.append(self.local_config["container"]["kernel-gid"]) - if gids: - environ["LOCAL_GROUP_ID"] = str(gids.pop(0)) + environ["LOCAL_GROUP_ID"] = str(ogid) + else: + if KernelFeatures.UID_MATCH in ctx.kernel_features: + gid = self.local_config["container"]["kernel-gid"] + environ["LOCAL_GROUP_ID"] = str(gid) - update_additional_gids(environ, [*gids, *ctx.get_supplementary_gids()]) + update_additional_gids(environ, ctx.get_supplementary_gids() or []) environ.update( await ctx.get_extra_envs(), ) diff --git a/src/ai/backend/agent/docker/agent.py b/src/ai/backend/agent/docker/agent.py index ab062f3d56..e8bda69dfc 100644 --- a/src/ai/backend/agent/docker/agent.py +++ b/src/ai/backend/agent/docker/agent.py @@ -236,7 +236,8 @@ def __init__( self.config_dir = scratch_dir / "config" self.work_dir = scratch_dir / "work" self.uid = kernel_config["uid"] - self.gids = kernel_config["gids"] + self.main_gid = kernel_config["main_gid"] + self.supplementary_gids = kernel_config["supplementary_gids"] self.port_pool = port_pool self.agent_sockpath = agent_sockpath @@ -257,17 +258,11 @@ def get_overriding_uid(self) -> Optional[int]: @override def get_overriding_gid(self) -> Optional[int]: - if self.gids: - return self.gids[0] - else: - return None + return self.main_gid @override - def get_supplementary_gids(self) -> set[int]: - if self.gids is not None: - return set(self.gids[1:]) - else: - return set() + def get_supplementary_gids(self) -> Optional[list[int]]: + return self.supplementary_gids def _kernel_resource_spec_read(self, filename): with open(filename, "r") as f: diff --git a/src/ai/backend/common/types.py b/src/ai/backend/common/types.py index c44a3d8adc..8eb7f9c017 100644 --- a/src/ai/backend/common/types.py +++ b/src/ai/backend/common/types.py @@ -1091,7 +1091,8 @@ class KernelCreationConfig(TypedDict): cluster_idx: int # the kernel's index in the cluster cluster_hostname: str # the kernel's hostname in the cluster uid: Optional[int] - gids: Optional[list[int]] + main_gid: Optional[int] + supplementary_gids: Optional[list[int]] resource_slots: Mapping[str, str] # json form of ResourceSlot resource_opts: Mapping[str, str] # json form of resource options environ: Mapping[str, str] @@ -1123,7 +1124,8 @@ class KernelEnqueueingConfig(TypedDict): bootstrap_script: str startup_command: Optional[str] uid: Optional[int] - gids: Optional[list[int]] + main_gid: Optional[int] + supplementary_gids: Optional[list[int]] def _stringify_number(v: Union[BinarySize, int, float, Decimal]) -> str: diff --git a/src/ai/backend/manager/models/alembic/versions/f6ca2f2d04c1_add_uid_and_gid_columns.py b/src/ai/backend/manager/models/alembic/versions/f6ca2f2d04c1_add_uid_and_gid_columns.py index 338c801180..1aefcb9237 100644 --- a/src/ai/backend/manager/models/alembic/versions/f6ca2f2d04c1_add_uid_and_gid_columns.py +++ b/src/ai/backend/manager/models/alembic/versions/f6ca2f2d04c1_add_uid_and_gid_columns.py @@ -27,7 +27,10 @@ def upgrade() -> None: op.add_column( "kernels", sa.Column( - "additional_gids", sa.ARRAY(sa.Integer()), server_default=sa.text("NULL"), nullable=True + "supplementary_gids", + sa.ARRAY(sa.Integer()), + server_default=sa.text("NULL"), + nullable=True, ), ) op.add_column( @@ -43,7 +46,7 @@ def upgrade() -> None: op.add_column( "users", sa.Column( - "container_additional_gids", + "container_supplementary_gids", sa.ARRAY(sa.Integer()), server_default=sa.text("NULL"), nullable=True, @@ -53,8 +56,8 @@ def upgrade() -> None: def downgrade() -> None: op.drop_column("users", "container_main_gid") - op.drop_column("users", "container_additional_gids") + op.drop_column("users", "container_supplementary_gids") op.drop_column("users", "container_uid") - op.drop_column("kernels", "additional_gids") + op.drop_column("kernels", "supplementary_gids") op.drop_column("kernels", "main_gid") op.drop_column("kernels", "uid") diff --git a/src/ai/backend/manager/models/kernel.py b/src/ai/backend/manager/models/kernel.py index 2872ff8a42..f35156d8a4 100644 --- a/src/ai/backend/manager/models/kernel.py +++ b/src/ai/backend/manager/models/kernel.py @@ -418,8 +418,8 @@ class KernelRow(Base): ) uid = sa.Column("uid", sa.Integer, nullable=True, server_default=sa.null()) main_gid = sa.Column("main_gid", sa.Integer, nullable=True, server_default=sa.null()) - additional_gids = sa.Column( - "additional_gids", sa.ARRAY(sa.Integer), nullable=True, server_default=sa.null() + supplementary_gids = sa.Column( + "supplementary_gids", sa.ARRAY(sa.Integer), nullable=True, server_default=sa.null() ) # Resource ownership diff --git a/src/ai/backend/manager/models/user.py b/src/ai/backend/manager/models/user.py index a7cd3cb67d..0673b229c8 100644 --- a/src/ai/backend/manager/models/user.py +++ b/src/ai/backend/manager/models/user.py @@ -162,7 +162,10 @@ class UserStatus(enum.StrEnum): sa.Column("container_uid", sa.Integer, nullable=True, server_default=sa.null()), sa.Column("container_main_gid", sa.Integer, nullable=True, server_default=sa.null()), sa.Column( - "container_additional_gids", sa.ARRAY(sa.Integer), nullable=True, server_default=sa.null() + "container_supplementary_gids", + sa.ARRAY(sa.Integer), + nullable=True, + server_default=sa.null(), ), ) diff --git a/src/ai/backend/manager/registry.py b/src/ai/backend/manager/registry.py index 269a515b6c..c6fb26a37d 100644 --- a/src/ai/backend/manager/registry.py +++ b/src/ai/backend/manager/registry.py @@ -620,7 +620,8 @@ async def create_session( "kernel_configs": [ { "uid": sess.user.container_uid, - "gids": sess.user.container_gids, + "main_gid": sess.user.container_main_gid, + "supplementary_gids": sess.user.container_supplementary_gids, "image_ref": image_ref, "cluster_role": DEFAULT_ROLE, "cluster_idx": 1, @@ -1325,7 +1326,8 @@ async def enqueue_session( else kernel["cluster_hostname"] ), "uid": kernel["uid"], - "gids": kernel["gids"], + "main_gid": kernel["main_gid"], + "supplementary_gids": kernel["supplementary_gids"], "image": image_ref.canonical, # "image_id": image_row.id, "architecture": image_ref.architecture, @@ -1844,7 +1846,8 @@ def get_image_conf(kernel: KernelRow) -> ImageConfig: "local_rank": binding.kernel.local_rank, "cluster_hostname": binding.kernel.cluster_hostname, "uid": binding.kernel.uid, - "gids": binding.kernel.gids, + "main_gid": binding.kernel.main_gid, + "supplementary_gids": binding.kernel.supplementary_gids, "idle_timeout": int(idle_timeout), "mounts": [item.to_json() for item in scheduled_session.vfolder_mounts], "environ": {