From 79f80d13a9d49e7c750a9d0f6bbeeaa50d34476f Mon Sep 17 00:00:00 2001 From: Kyujin Cho Date: Wed, 8 Jan 2025 14:16:36 +0900 Subject: [PATCH] fix(BA-450): do not set timeout on docker push API execution (#3391) --- changes/3391.fix.md | 1 + src/ai/backend/agent/agent.py | 8 +++++++- src/ai/backend/agent/docker/agent.py | 13 +++++++++++-- src/ai/backend/agent/dummy/agent.py | 9 ++++++++- src/ai/backend/agent/server.py | 1 + 5 files changed, 28 insertions(+), 4 deletions(-) create mode 100644 changes/3391.fix.md diff --git a/changes/3391.fix.md b/changes/3391.fix.md new file mode 100644 index 0000000000..376ad1e626 --- /dev/null +++ b/changes/3391.fix.md @@ -0,0 +1 @@ +Fix certain customized images not being pushed to registry properly diff --git a/src/ai/backend/agent/agent.py b/src/ai/backend/agent/agent.py index e7bd629e47..2b80b980f0 100644 --- a/src/ai/backend/agent/agent.py +++ b/src/ai/backend/agent/agent.py @@ -1616,7 +1616,13 @@ async def _scan_images_wrapper(self, interval: float) -> None: self.images = await self.scan_images() @abstractmethod - async def push_image(self, image_ref: ImageRef, registry_conf: ImageRegistry) -> None: + async def push_image( + self, + image_ref: ImageRef, + registry_conf: ImageRegistry, + *, + timeout: float | None | Sentinel = Sentinel.TOKEN, + ) -> None: """ Push the given image to the given registry. """ diff --git a/src/ai/backend/agent/docker/agent.py b/src/ai/backend/agent/docker/agent.py index 3ff850eaa1..97fdb20423 100644 --- a/src/ai/backend/agent/docker/agent.py +++ b/src/ai/backend/agent/docker/agent.py @@ -1538,7 +1538,13 @@ async def handle_agent_socket(self): else: zmq_ctx.destroy() - async def push_image(self, image_ref: ImageRef, registry_conf: ImageRegistry) -> None: + async def push_image( + self, + image_ref: ImageRef, + registry_conf: ImageRegistry, + *, + timeout: float | None | Sentinel = Sentinel.TOKEN, + ) -> None: if image_ref.is_local: return auth_config = None @@ -1554,7 +1560,10 @@ async def push_image(self, image_ref: ImageRef, registry_conf: ImageRegistry) -> } async with closing_async(Docker()) as docker: - result = await docker.images.push(image_ref.canonical, auth=auth_config) + kwargs: dict[str, Any] = {"auth": auth_config} + if timeout != Sentinel.TOKEN: + kwargs["timeout"] = timeout + result = await docker.images.push(image_ref.canonical, **kwargs) if not result: raise RuntimeError("Failed to push image: unexpected return value from aiodocker") diff --git a/src/ai/backend/agent/dummy/agent.py b/src/ai/backend/agent/dummy/agent.py index 9450a0d34c..b34f6cb038 100644 --- a/src/ai/backend/agent/dummy/agent.py +++ b/src/ai/backend/agent/dummy/agent.py @@ -31,6 +31,7 @@ MountPermission, MountTypes, ResourceSlot, + Sentinel, ServicePort, SessionId, SlotName, @@ -293,7 +294,13 @@ async def pull_image( delay = self.dummy_agent_cfg["delay"]["pull-image"] await asyncio.sleep(delay) - async def push_image(self, image_ref: ImageRef, registry_conf: ImageRegistry) -> None: + async def push_image( + self, + image_ref: ImageRef, + registry_conf: ImageRegistry, + *, + timeout: float | None | Sentinel = Sentinel.TOKEN, + ) -> None: delay = self.dummy_agent_cfg["delay"]["push-image"] await asyncio.sleep(delay) diff --git a/src/ai/backend/agent/server.py b/src/ai/backend/agent/server.py index f473561250..52b4c14726 100644 --- a/src/ai/backend/agent/server.py +++ b/src/ai/backend/agent/server.py @@ -819,6 +819,7 @@ async def _push_image(reporter: ProgressReporter) -> None: await self.agent.push_image( image_ref, registry_conf, + timeout=None, ) task_id = await bgtask_mgr.start(_push_image)