Skip to content

Commit

Permalink
fix: SIGINT worker processes when process_manager starts
Browse files Browse the repository at this point in the history
Without this change, I found that workers could get stuck on a long-running task.
Worker processes are sent a SIGINT, and then that signal is handled appropriately to exit the prefetcher &
runner
  • Loading branch information
sminnee committed Sep 4, 2024
1 parent ee4e5e6 commit bcd7c63
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 28 deletions.
6 changes: 5 additions & 1 deletion taskiq/cli/worker/process_manager.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import os
import signal
import sys
from contextlib import suppress
Expand Down Expand Up @@ -265,7 +266,10 @@ def start(self) -> Optional[int]: # noqa: C901
action.handle(self.workers, self.args, self.worker_function)
reloaded_workers.add(action.worker_num)
elif isinstance(action, ShutdownAction):
logger.debug("Process manager closed.")
logger.debug("Process manager closed, killing workers.")
for worker in self.workers:
if worker.pid:
os.kill(worker.pid, signal.SIGINT)
return None

for worker_num, worker in enumerate(self.workers):
Expand Down
63 changes: 36 additions & 27 deletions taskiq/receiver/receiver.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import inspect
import signal
from concurrent.futures import Executor
from logging import getLogger
from time import time
Expand Down Expand Up @@ -334,6 +335,12 @@ async def listen(self) -> None: # pragma: no cover
gr.start_soon(self.prefetcher, queue)
gr.start_soon(self.runner, queue)

# Propagate cancellation to the prefetcher & runner
def _cancel(*_: Any) -> None:
gr.cancel_scope.cancel()

signal.signal(signal.SIGINT, _cancel)

if self.on_exit is not None:
self.on_exit(self)

Expand Down Expand Up @@ -361,9 +368,7 @@ async def prefetcher(
message = await iterator.__anext__()
fetched_tasks += 1
await queue.put(message)
except asyncio.CancelledError:
break
except StopAsyncIteration:
except (asyncio.CancelledError, StopAsyncIteration):
break

await queue.put(QUEUE_DONE)
Expand Down Expand Up @@ -394,31 +399,35 @@ def task_cb(task: "asyncio.Task[Any]") -> None:
self.sem.release()

while True:
# Waits for semaphore to be released.
if self.sem is not None:
await self.sem.acquire()

self.sem_prefetch.release()
message = await queue.get()
if message is QUEUE_DONE:
# asyncio.wait will throw an error if there is nothing to wait for
if tasks:
logger.info("Waiting for running tasks to complete.")
await asyncio.wait(tasks, timeout=self.wait_tasks_timeout)
break
try:
# Waits for semaphore to be released.
if self.sem is not None:
await self.sem.acquire()

self.sem_prefetch.release()
message = await queue.get()
if message is QUEUE_DONE:
# asyncio.wait will throw an error if there is nothing to wait for
if tasks:
logger.info("Waiting for running tasks to complete.")
await asyncio.wait(tasks, timeout=self.wait_tasks_timeout)
break

task = asyncio.create_task(
self.callback(message=message, raise_err=False),
)
tasks.add(task)

# We want the task to remove itself from the set when it's done.
#
# Because if we won't save it anywhere,
# python's GC can silently cancel task
# and this behaviour considered to be a Hisenbug.
# https://textual.textualize.io/blog/2023/02/11/the-heisenbug-lurking-in-your-async-code/
task.add_done_callback(task_cb)
task = asyncio.create_task(
self.callback(message=message, raise_err=False),
)
tasks.add(task)

# We want the task to remove itself from the set when it's done.
#
# Because if we won't save it anywhere,
# python's GC can silently cancel task
# and this behaviour considered to be a Hisenbug.
# https://textual.textualize.io/blog/2023/02/11/the-heisenbug-lurking-in-your-async-code/
task.add_done_callback(task_cb)

except asyncio.CancelledError:
break

def _prepare_task(self, name: str, handler: Callable[..., Any]) -> None:
"""
Expand Down

0 comments on commit bcd7c63

Please sign in to comment.