lablup · kyujin-cho · Nov 1, 2023 · Oct 20, 2023 · Oct 27, 2023 · Oct 27, 2023
diff --git a/changes/1642.fix.md b/changes/1642.fix.md
@@ -0,0 +1 @@
+Lower limit of maximum available characters to name of model service to fix model service session refuses to be created when service name is longer than 28 characters 
diff --git a/src/ai/backend/manager/api/service.py b/src/ai/backend/manager/api/service.py
@@ -166,7 +166,7 @@ async def get_info(request: web.Request) -> web.Response:
     t.Dict(
         {
             tx.AliasedKey(["name", "service_name", "clientSessionToken"])
-            >> "service_name": t.Regexp(r"^(?=.{4,64}$)\w[\w.-]*\w$", re.ASCII),
+            >> "service_name": t.Regexp(r"^(?=.{4,24}$)\w[\w.-]*\w$", re.ASCII),
             tx.AliasedKey(["desired_session_count", "desiredSessionCount"]): t.Int,
             tx.AliasedKey(["image", "lang"]): t.String,
             tx.AliasedKey(["arch", "architecture"], default=DEFAULT_IMAGE_ARCH)

diff --git a/src/ai/backend/manager/models/base.py b/src/ai/backend/manager/models/base.py
@@ -1063,3 +1063,14 @@ async def populate_fixture(
                     for row in rows:
                         row[col.name] = col.type._schema.from_json(row[col.name])
             await conn.execute(sa.dialects.postgresql.insert(table, rows).on_conflict_do_nothing())
+
+
+class InferenceSessionError(graphene.ObjectType):
+    class InferenceSessionErrorInfo(graphene.ObjectType):
+        src = graphene.String(required=True)
+        name = graphene.String(required=True)
+        repr = graphene.String(required=True)
+
+    session_id = graphene.UUID()
+
+    errors = graphene.List(graphene.NonNull(InferenceSessionErrorInfo), required=True)
diff --git a/src/ai/backend/manager/models/endpoint.py b/src/ai/backend/manager/models/endpoint.py
@@ -26,6 +26,7 @@
     EnumValueType,
     ForeignKeyIDColumn,
     IDColumn,
+    InferenceSessionError,
     Item,
     PaginatedList,
     ResourceSlotColumn,
@@ -354,17 +355,6 @@ async def get(
         return row
 
 
-class InferenceSessionError(graphene.ObjectType):
-    class InferenceSessionErrorInfo(graphene.ObjectType):
-        src = graphene.String(required=True)
-        name = graphene.String(required=True)
-        repr = graphene.String(required=True)
-
-    session_id = graphene.UUID()
-
-    errors = graphene.List(graphene.NonNull(InferenceSessionErrorInfo), required=True)
-
-
 class Endpoint(graphene.ObjectType):
     class Meta:
         interfaces = (Item,)

diff --git a/src/ai/backend/manager/models/routing.py b/src/ai/backend/manager/models/routing.py
@@ -1,7 +1,7 @@
 import logging
 import uuid
 from enum import Enum
-from typing import TYPE_CHECKING, Optional, Sequence
+from typing import TYPE_CHECKING, Any, Optional, Sequence
 
 import graphene
 import sqlalchemy as sa
@@ -14,7 +14,7 @@
 from ai.backend.common.logging_utils import BraceStyleAdapter
 
 from ..api.exceptions import RoutingNotFound
-from .base import GUID, Base, EnumValueType, IDColumn, Item, PaginatedList
+from .base import GUID, Base, EnumValueType, IDColumn, InferenceSessionError, Item, PaginatedList
 
 if TYPE_CHECKING:
     # from .gql import GraphQueryContext
@@ -209,6 +209,7 @@ class Meta:
     status = graphene.String()
     traffic_ratio = graphene.Float()
     created_at = GQLDateTime()
+    error = InferenceSessionError()
     error_data = graphene.JSONString()
 
     @classmethod
@@ -329,6 +330,24 @@ async def load_item(
             raise RoutingNotFound
         return await Routing.from_row(ctx, row)
 
+    async def resolve_error(self, info: graphene.ResolveInfo) -> Any:
+        if self.status != RouteStatus.FAILED_TO_START or not self.error_data:
+            return None
+        match self.error_data["type"]:
+            case "session_cancelled":
+                session_id = self.error_data["session_id"]
+            case _:
+                session_id = None
+        return InferenceSessionError(
+            session_id=session_id,
+            errors=[
+                InferenceSessionError.InferenceSessionErrorInfo(
+                    src=e["src"], name=e["name"], repr=e["repr"]
+                )
+                for e in self.error_data["errors"]
+            ],
+        )
+
 
 class RoutingList(graphene.ObjectType):
     class Meta:

diff --git a/src/ai/backend/manager/registry.py b/src/ai/backend/manager/registry.py
@@ -3689,7 +3689,7 @@ async def handle_route_creation(
             )
 
             await context.create_session(
-                f"{endpoint.name}-{uuid.uuid4()}",
+                f"{endpoint.name}-{str(event.route_id)}",
                 endpoint.image_row.name,
                 endpoint.image_row.architecture,
                 UserScope(
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Lower limit of maximum available characters to name of model service to fix model service session refuses to be created when service name is longer than 28 characters