From 23133a54a38c372ec96821b055adee81f0e8aff2 Mon Sep 17 00:00:00 2001 From: Gyubong Lee Date: Tue, 30 Apr 2024 04:45:26 +0000 Subject: [PATCH] feat: Add `hot_clients_ips_maxsize`, `hot_clients_ips_threshold_ratio` --- changes/2087.feature.md | 1 + src/ai/backend/manager/api/ratelimit.py | 27 +++++++++++++++++-------- src/ai/backend/manager/config.py | 7 ++++++- 3 files changed, 26 insertions(+), 9 deletions(-) create mode 100644 changes/2087.feature.md diff --git a/changes/2087.feature.md b/changes/2087.feature.md new file mode 100644 index 0000000000..03412d8aa4 --- /dev/null +++ b/changes/2087.feature.md @@ -0,0 +1 @@ +Add `hot_clients_ips_maxsize`, `hot_clients_ips_threshold_ratio` diff --git a/src/ai/backend/manager/api/ratelimit.py b/src/ai/backend/manager/api/ratelimit.py index 7bae230b27..6bdee5b3ab 100644 --- a/src/ai/backend/manager/api/ratelimit.py +++ b/src/ai/backend/manager/api/ratelimit.py @@ -50,15 +50,15 @@ if id_type == "ip" then local rate_limit = tonumber(ARGV[3]) - local score_threshold = rate_limit * 0.8 + local hot_clients_ips_maxsize = tonumber(ARGV[4]) + local hot_clients_ips_threshold_ratio = tonumber(ARGV[5]) - -- Add IP to hot_clients_ips only if count is greater than score_threshold - if rolling_count >= score_threshold then + -- Add the IP address to "hot_clients_ips" only if rolling_count is greater than the threshold + if rolling_count >= rate_limit * hot_clients_ips_threshold_ratio then redis.call('ZADD', 'hot_clients_ips', rolling_count, id_value) - local max_size = 1000 local current_size = redis.call('ZCARD', 'hot_clients_ips') - if current_size > max_size then + if current_size > hot_clients_ips_maxsize then redis.call('ZREMRANGEBYRANK', 'hot_clients_ips', 0, 0) end end @@ -103,23 +103,34 @@ async def rlim_middleware( return response else: root_ctx: RootContext = app["_root.context"] - rate_limit = root_ctx.shared_config["anonymous_ratelimit"] + anonymous_ratelimiter = root_ctx.shared_config["anonymous_ratelimiter"] ip_address = get_client_ip(request) - if not ip_address or rate_limit is None: + if not ip_address or anonymous_ratelimiter is None: # No checks for rate limiting. response = await handler(request) # Arbitrary number for indicating no rate limiting. response.headers["X-RateLimit-Limit"] = "1000" response.headers["X-RateLimit-Remaining"] = "1000" else: + rate_limit, hot_clients_ips_maxsize, hot_clients_ips_threshold_ratio = ( + anonymous_ratelimiter["rlimit"], + anonymous_ratelimiter["hot_clients_ips_maxsize"], + anonymous_ratelimiter["hot_clients_ips_threshold_ratio"], + ) ret = await redis_helper.execute_script( rr, "ratelimit", _rlim_script, ["ip", ip_address], - [str(now), str(_rlim_window), str(rate_limit)], + [ + str(now), + str(_rlim_window), + str(rate_limit), + str(hot_clients_ips_maxsize), + str(hot_clients_ips_threshold_ratio), + ], ) if ret is None: remaining = rate_limit diff --git a/src/ai/backend/manager/config.py b/src/ai/backend/manager/config.py index 067717fcc3..eec0e7f11a 100644 --- a/src/ai/backend/manager/config.py +++ b/src/ai/backend/manager/config.py @@ -465,7 +465,12 @@ def container_registry_serialize(v: dict[str, Any]) -> dict[str, str]: }, ).allow_extra("*"), t.Key("roundrobin_states", default=None): t.Null | tx.RoundRobinStatesJSONString, - t.Key("anonymous_ratelimit", default=None): t.Null | t.ToInt, + t.Key("anonymous_ratelimiter", default=None): t.Null + | t.Dict({ + t.Key("rlimit"): t.ToInt(), + t.Key("hot_clients_ips_maxsize", default=1000): t.Null | t.ToInt(), + t.Key("hot_clients_ips_threshold_ratio", default=0.8): t.Null | t.ToFloat(), + }), }).allow_extra("*") _volume_defaults: dict[str, Any] = {