diff --git a/changes/1642.fix.md b/changes/1642.fix.md new file mode 100644 index 0000000000..cd5f50ad3d --- /dev/null +++ b/changes/1642.fix.md @@ -0,0 +1 @@ +Lower limit of maximum available characters to name of model service to fix model service session refuses to be created when service name is longer than 28 characters diff --git a/src/ai/backend/manager/api/service.py b/src/ai/backend/manager/api/service.py index 4479604cf7..51cfd36c03 100644 --- a/src/ai/backend/manager/api/service.py +++ b/src/ai/backend/manager/api/service.py @@ -166,7 +166,7 @@ async def get_info(request: web.Request) -> web.Response: t.Dict( { tx.AliasedKey(["name", "service_name", "clientSessionToken"]) - >> "service_name": t.Regexp(r"^(?=.{4,64}$)\w[\w.-]*\w$", re.ASCII), + >> "service_name": t.Regexp(r"^(?=.{4,24}$)\w[\w.-]*\w$", re.ASCII), tx.AliasedKey(["desired_session_count", "desiredSessionCount"]): t.Int, tx.AliasedKey(["image", "lang"]): t.String, tx.AliasedKey(["arch", "architecture"], default=DEFAULT_IMAGE_ARCH) diff --git a/src/ai/backend/manager/models/base.py b/src/ai/backend/manager/models/base.py index a2498cd9a3..895708d462 100644 --- a/src/ai/backend/manager/models/base.py +++ b/src/ai/backend/manager/models/base.py @@ -1063,3 +1063,14 @@ async def populate_fixture( for row in rows: row[col.name] = col.type._schema.from_json(row[col.name]) await conn.execute(sa.dialects.postgresql.insert(table, rows).on_conflict_do_nothing()) + + +class InferenceSessionError(graphene.ObjectType): + class InferenceSessionErrorInfo(graphene.ObjectType): + src = graphene.String(required=True) + name = graphene.String(required=True) + repr = graphene.String(required=True) + + session_id = graphene.UUID() + + errors = graphene.List(graphene.NonNull(InferenceSessionErrorInfo), required=True) diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py index c889f2e14e..dc402f14ea 100644 --- a/src/ai/backend/manager/models/endpoint.py +++ b/src/ai/backend/manager/models/endpoint.py @@ -26,6 +26,7 @@ EnumValueType, ForeignKeyIDColumn, IDColumn, + InferenceSessionError, Item, PaginatedList, ResourceSlotColumn, @@ -354,17 +355,6 @@ async def get( return row -class InferenceSessionError(graphene.ObjectType): - class InferenceSessionErrorInfo(graphene.ObjectType): - src = graphene.String(required=True) - name = graphene.String(required=True) - repr = graphene.String(required=True) - - session_id = graphene.UUID() - - errors = graphene.List(graphene.NonNull(InferenceSessionErrorInfo), required=True) - - class Endpoint(graphene.ObjectType): class Meta: interfaces = (Item,) diff --git a/src/ai/backend/manager/models/routing.py b/src/ai/backend/manager/models/routing.py index 3bca37b8a3..c8bfaba29b 100644 --- a/src/ai/backend/manager/models/routing.py +++ b/src/ai/backend/manager/models/routing.py @@ -1,7 +1,7 @@ import logging import uuid from enum import Enum -from typing import TYPE_CHECKING, Optional, Sequence +from typing import TYPE_CHECKING, Any, Optional, Sequence import graphene import sqlalchemy as sa @@ -14,7 +14,7 @@ from ai.backend.common.logging_utils import BraceStyleAdapter from ..api.exceptions import RoutingNotFound -from .base import GUID, Base, EnumValueType, IDColumn, Item, PaginatedList +from .base import GUID, Base, EnumValueType, IDColumn, InferenceSessionError, Item, PaginatedList if TYPE_CHECKING: # from .gql import GraphQueryContext @@ -209,6 +209,7 @@ class Meta: status = graphene.String() traffic_ratio = graphene.Float() created_at = GQLDateTime() + error = InferenceSessionError() error_data = graphene.JSONString() @classmethod @@ -329,6 +330,24 @@ async def load_item( raise RoutingNotFound return await Routing.from_row(ctx, row) + async def resolve_error(self, info: graphene.ResolveInfo) -> Any: + if self.status != RouteStatus.FAILED_TO_START or not self.error_data: + return None + match self.error_data["type"]: + case "session_cancelled": + session_id = self.error_data["session_id"] + case _: + session_id = None + return InferenceSessionError( + session_id=session_id, + errors=[ + InferenceSessionError.InferenceSessionErrorInfo( + src=e["src"], name=e["name"], repr=e["repr"] + ) + for e in self.error_data["errors"] + ], + ) + class RoutingList(graphene.ObjectType): class Meta: diff --git a/src/ai/backend/manager/registry.py b/src/ai/backend/manager/registry.py index 0394aa312b..61f53aae1f 100644 --- a/src/ai/backend/manager/registry.py +++ b/src/ai/backend/manager/registry.py @@ -3689,7 +3689,7 @@ async def handle_route_creation( ) await context.create_session( - f"{endpoint.name}-{uuid.uuid4()}", + f"{endpoint.name}-{str(event.route_id)}", endpoint.image_row.name, endpoint.image_row.architecture, UserScope(