From e869dcc477b40d8dfced0ca5bae7596f2f4faa4d Mon Sep 17 00:00:00 2001 From: Sanghun Lee Date: Thu, 9 Jan 2025 13:51:31 +0900 Subject: [PATCH] fix(BA-432): Enforce VFolder name length restriction through the API schema (#3363) Backported-from: main (24.12) Backported-to: 24.03 Backport-of: 3363 --- changes/3363.fix.md | 1 + src/ai/backend/common/defs.py | 31 ++++++ src/ai/backend/common/typed_validators.py | 30 +++++ src/ai/backend/manager/api/vfolder.py | 105 +++++++++++------- ...4_extend_length_of_vfolders_name_column.py | 36 ++++++ src/ai/backend/manager/models/vfolder.py | 5 +- 6 files changed, 162 insertions(+), 46 deletions(-) create mode 100644 changes/3363.fix.md create mode 100644 src/ai/backend/manager/models/alembic/versions/ef9a7960d234_extend_length_of_vfolders_name_column.py diff --git a/changes/3363.fix.md b/changes/3363.fix.md new file mode 100644 index 0000000000..cb4d34ba9e --- /dev/null +++ b/changes/3363.fix.md @@ -0,0 +1 @@ +Enforce VFolder name length restriction through the API schema, not by the DB column constraint diff --git a/src/ai/backend/common/defs.py b/src/ai/backend/common/defs.py index 9dadda4c90..a7f396ad0b 100644 --- a/src/ai/backend/common/defs.py +++ b/src/ai/backend/common/defs.py @@ -1,3 +1,4 @@ +import re from typing import Final # Redis database IDs depending on purposes @@ -10,3 +11,33 @@ DEFAULT_FILE_IO_TIMEOUT: Final = 10 + +_RESERVED_VFOLDER_PATTERNS = [r"^\.[a-z0-9]+rc$", r"^\.[a-z0-9]+_profile$"] +RESERVED_VFOLDERS = [ + ".terminfo", + ".jupyter", + ".tmux.conf", + ".ssh", + "/bin", + "/boot", + "/dev", + "/etc", + "/lib", + "/lib64", + "/media", + "/mnt", + "/opt", + "/proc", + "/root", + "/run", + "/sbin", + "/srv", + "/sys", + "/tmp", + "/usr", + "/var", + "/home", +] +RESERVED_VFOLDER_PATTERNS = [re.compile(x) for x in _RESERVED_VFOLDER_PATTERNS] +API_VFOLDER_LENGTH_LIMIT: Final[int] = 64 +MODEL_VFOLDER_LENGTH_LIMIT: Final[int] = 128 diff --git a/src/ai/backend/common/typed_validators.py b/src/ai/backend/common/typed_validators.py index 44855a5e5f..2d2bf1bde6 100644 --- a/src/ai/backend/common/typed_validators.py +++ b/src/ai/backend/common/typed_validators.py @@ -11,6 +11,13 @@ from pydantic.json_schema import JsonSchemaValue from pydantic_core import core_schema +from .defs import ( + API_VFOLDER_LENGTH_LIMIT, + MODEL_VFOLDER_LENGTH_LIMIT, + RESERVED_VFOLDER_PATTERNS, + RESERVED_VFOLDERS, +) + TVariousDelta: TypeAlias = datetime.timedelta | relativedelta @@ -156,3 +163,26 @@ def session_name_validator(s: str) -> str: SessionName = Annotated[str, AfterValidator(session_name_validator)] """Validator with extended re.ASCII option to match session name string literal""" + + +def _vfolder_name_validator(name: str) -> str: + f""" + Although the length constraint of the `vfolders.name` column is {MODEL_VFOLDER_LENGTH_LIMIT}, + we limit the length to {API_VFOLDER_LENGTH_LIMIT} in the create/rename API + because we append a timestamp of deletion to the name when VFolders are deleted. + """ + if (name_len := len(name)) > API_VFOLDER_LENGTH_LIMIT: + raise AssertionError( + f"The length of VFolder name should be shorter than {API_VFOLDER_LENGTH_LIMIT}. (len: {name_len})" + ) + if name in RESERVED_VFOLDERS: + raise AssertionError(f"VFolder name '{name}' is reserved for internal operations") + for pattern in RESERVED_VFOLDER_PATTERNS: + if pattern.match(name): + raise AssertionError( + f"VFolder name '{name}' matches a reserved pattern (pattern: {pattern})" + ) + return name + + +VFolderName = Annotated[str, AfterValidator(_vfolder_name_validator)] diff --git a/src/ai/backend/manager/api/vfolder.py b/src/ai/backend/manager/api/vfolder.py index 820157f264..30e305ee10 100644 --- a/src/ai/backend/manager/api/vfolder.py +++ b/src/ai/backend/manager/api/vfolder.py @@ -44,6 +44,7 @@ from sqlalchemy.orm import load_only, selectinload from ai.backend.common import msgpack, redis_helper +from ai.backend.common import typed_validators as tv from ai.backend.common import validators as tx from ai.backend.common.logging import BraceStyleAdapter from ai.backend.common.types import ( @@ -341,20 +342,33 @@ async def _wrapped(request: web.Request, *args: P.args, **kwargs: P.kwargs) -> w return _wrapped +class CreateRequestModel(BaseModel): + name: tv.VFolderName = Field( + description="Name of the vfolder", + ) + folder_host: str | None = Field( + validation_alias=AliasChoices("host", "folder_host"), + default=None, + ) + usage_mode: VFolderUsageMode = Field(default=VFolderUsageMode.GENERAL) + permission: VFolderPermission = Field(default=VFolderPermission.READ_WRITE) + unmanaged_path: str | None = Field( + validation_alias=AliasChoices("unmanaged_path", "unmanagedPath"), + default=None, + ) + group: str | uuid.UUID | None = Field( + validation_alias=AliasChoices("group", "groupId", "group_id"), + default=None, + ) + cloneable: bool = Field( + default=False, + ) + + @auth_required @server_status_required(ALL_ALLOWED) -@check_api_params( - t.Dict({ - t.Key("name"): tx.Slug(allow_dot=True), - t.Key("host", default=None) >> "folder_host": t.String | t.Null, - t.Key("usage_mode", default="general"): tx.Enum(VFolderUsageMode) | t.Null, - t.Key("permission", default="rw"): tx.Enum(VFolderPermission) | t.Null, - tx.AliasedKey(["unmanaged_path", "unmanagedPath"], default=None): t.String | t.Null, - tx.AliasedKey(["group", "groupId", "group_id"], default=None): tx.UUID | t.String | t.Null, - t.Key("cloneable", default=False): t.Bool, - }), -) -async def create(request: web.Request, params: Any) -> web.Response: +@pydantic_params_api_handler(CreateRequestModel) +async def create(request: web.Request, params: CreateRequestModel) -> web.Response: resp: Dict[str, Any] = {} root_ctx: RootContext = request.app["_root.context"] access_key = request["keypair"]["access_key"] @@ -362,18 +376,18 @@ async def create(request: web.Request, params: Any) -> web.Response: user_uuid: uuid.UUID = request["user"]["uuid"] keypair_resource_policy = request["keypair"]["resource_policy"] domain_name = request["user"]["domain_name"] - group_id_or_name = params["group"] + group_id_or_name = params.group log.info( "VFOLDER.CREATE (email:{}, ak:{}, vf:{}, vfh:{}, umod:{}, perm:{})", request["user"]["email"], access_key, - params["name"], - params["folder_host"], - params["usage_mode"].value, - params["permission"].value, + params.name, + params.folder_host, + params.usage_mode.value, + params.permission.value, ) - folder_host = params["folder_host"] - unmanaged_path = params["unmanaged_path"] + folder_host = params.folder_host + unmanaged_path = params.unmanaged_path # Check if user is trying to created unmanaged vFolder if unmanaged_path: # Approve only if user is Admin or Superadmin @@ -390,10 +404,8 @@ async def create(request: web.Request, params: Any) -> web.Response: allowed_vfolder_types = await root_ctx.shared_config.get_vfolder_types() - if not verify_vfolder_name(params["name"]): - raise InvalidAPIParameters(f"{params['name']} is reserved for internal operations.") - if params["name"].startswith(".") and params["name"] != ".local": - if params["group"] is not None: + if params.name.startswith(".") and params.name != ".local": + if params.group is not None: raise InvalidAPIParameters("dot-prefixed vfolders cannot be a group folder.") group_uuid: uuid.UUID | None = None @@ -477,17 +489,18 @@ async def create(request: web.Request, params: Any) -> web.Response: ) if group_type == ProjectType.MODEL_STORE: - if params["permission"] != VFolderPermission.READ_WRITE: + if params.permission != VFolderPermission.READ_WRITE: raise InvalidAPIParameters( "Setting custom permission is not supported for model store vfolder" ) - if params["usage_mode"] != VFolderUsageMode.MODEL: + if params.usage_mode != VFolderUsageMode.MODEL: raise InvalidAPIParameters( "Only Model VFolder can be created under the model store project" ) async with root_ctx.db.begin() as conn: if not unmanaged_path: + assert folder_host is not None await ensure_host_permission_allowed( conn, folder_host, @@ -522,7 +535,7 @@ async def create(request: web.Request, params: Any) -> web.Response: # Prevent creation of vfolder with duplicated name on all hosts. extra_vf_conds = [ - (vfolders.c.name == params["name"]), + (vfolders.c.name == params.name), (vfolders.c.status.not_in(HARD_DELETED_VFOLDER_STATUSES)), ] entries = await query_accessible_vfolders( @@ -534,7 +547,7 @@ async def create(request: web.Request, params: Any) -> web.Response: extra_vf_conds=(sa.and_(*extra_vf_conds)), ) if len(entries) > 0: - raise VFolderAlreadyExists(extra_data=params["name"]) + raise VFolderAlreadyExists(extra_data=params.name) try: folder_id = uuid.uuid4() vfid = VFolderID(quota_scope_id, folder_id) @@ -555,6 +568,7 @@ async def create(request: web.Request, params: Any) -> web.Response: # }, # ): # pass + assert folder_host is not None options = {} if max_quota_scope_size and max_quota_scope_size > 0: options["initial_max_size_for_quota_scope"] = max_quota_scope_size @@ -574,16 +588,17 @@ async def create(request: web.Request, params: Any) -> web.Response: # By default model store VFolder should be considered as read only for every users but without the creator if group_type == ProjectType.MODEL_STORE: - params["permission"] = VFolderPermission.READ_ONLY + params.permission = VFolderPermission.READ_ONLY # TODO: include quota scope ID in the database # TODO: include quota scope ID in the API response insert_values = { "id": vfid.folder_id.hex, - "name": params["name"], + "name": params.name, + "domain_name": domain_name, "quota_scope_id": str(quota_scope_id), - "usage_mode": params["usage_mode"], - "permission": params["permission"], + "usage_mode": params.usage_mode, + "permission": params.permission, "last_used": None, "host": folder_host, "creator": request["user"]["email"], @@ -591,22 +606,22 @@ async def create(request: web.Request, params: Any) -> web.Response: "user": user_uuid if ownership_type == "user" else None, "group": group_uuid if ownership_type == "group" else None, "unmanaged_path": "", - "cloneable": params["cloneable"], + "cloneable": params.cloneable, "status": VFolderOperationStatus.READY, } resp = { "id": vfid.folder_id.hex, - "name": params["name"], + "name": params.name, "quota_scope_id": str(quota_scope_id), "host": folder_host, - "usage_mode": params["usage_mode"].value, - "permission": params["permission"].value, + "usage_mode": params.usage_mode.value, + "permission": params.permission.value, "max_size": 0, # migrated to quota scopes, no longer valid "creator": request["user"]["email"], "ownership_type": ownership_type, "user": str(user_uuid) if ownership_type == "user" else None, "group": str(group_uuid) if ownership_type == "group" else None, - "cloneable": params["cloneable"], + "cloneable": params.cloneable, "status": VFolderOperationStatus.READY, } if unmanaged_path: @@ -1156,16 +1171,20 @@ async def get_used_bytes(request: web.Request, params: Any) -> web.Response: return web.json_response(usage, status=200) +class RenameRequestModel(BaseModel): + new_name: tv.VFolderName = Field( + description="Name of the vfolder", + ) + + @auth_required @server_status_required(ALL_ALLOWED) +@pydantic_params_api_handler(RenameRequestModel) @with_vfolder_rows_resolved(VFolderPermission.OWNER_PERM) -@check_api_params( - t.Dict({ - t.Key("new_name"): tx.Slug(allow_dot=True), - }) -) async def rename_vfolder( - request: web.Request, params: Any, row: Sequence[VFolderRow] + request: web.Request, + row: Sequence[VFolderRow], + params: RenameRequestModel, ) -> web.Response: root_ctx: RootContext = request.app["_root.context"] old_name = request.match_info["name"] @@ -1174,7 +1193,7 @@ async def rename_vfolder( user_role = request["user"]["role"] user_uuid = request["user"]["uuid"] resource_policy = request["keypair"]["resource_policy"] - new_name = params["new_name"] + new_name = params.new_name allowed_vfolder_types = await root_ctx.shared_config.get_vfolder_types() log.info( "VFOLDER.RENAME (email:{}, ak:{}, vf.old:{}, vf.new:{})", diff --git a/src/ai/backend/manager/models/alembic/versions/ef9a7960d234_extend_length_of_vfolders_name_column.py b/src/ai/backend/manager/models/alembic/versions/ef9a7960d234_extend_length_of_vfolders_name_column.py new file mode 100644 index 0000000000..5425164824 --- /dev/null +++ b/src/ai/backend/manager/models/alembic/versions/ef9a7960d234_extend_length_of_vfolders_name_column.py @@ -0,0 +1,36 @@ +"""extend length of vfolders.name column + +Revision ID: ef9a7960d234 +Revises: 0bb88d5a46bf +Create Date: 2025-01-03 16:07:11.407081 + +""" + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "ef9a7960d234" +down_revision = "0bb88d5a46bf" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.alter_column( + "vfolders", + "name", + existing_type=sa.VARCHAR(length=64), + type_=sa.String(length=128), + existing_nullable=False, + ) + + +def downgrade() -> None: + op.alter_column( + "vfolders", + "name", + existing_type=sa.String(length=128), + type_=sa.VARCHAR(length=64), + existing_nullable=False, + ) diff --git a/src/ai/backend/manager/models/vfolder.py b/src/ai/backend/manager/models/vfolder.py index 34ec5adfd7..59ad0f41f6 100644 --- a/src/ai/backend/manager/models/vfolder.py +++ b/src/ai/backend/manager/models/vfolder.py @@ -34,8 +34,7 @@ from sqlalchemy.orm import joinedload, relationship, selectinload from ai.backend.common.bgtask import ProgressReporter -from ai.backend.common.config import model_definition_iv -from ai.backend.common.logging import BraceStyleAdapter +from ai.backend.common.defs import MODEL_VFOLDER_LENGTH_LIMIT from ai.backend.common.types import ( MountPermission, QuotaScopeID, @@ -312,7 +311,7 @@ class VFolderCloneInfo(NamedTuple): # host will be '' if vFolder is unmanaged sa.Column("host", sa.String(length=128), nullable=False, index=True), sa.Column("quota_scope_id", QuotaScopeIDType, nullable=False), - sa.Column("name", sa.String(length=64), nullable=False, index=True), + sa.Column("name", sa.String(length=MODEL_VFOLDER_LENGTH_LIMIT), nullable=False, index=True), sa.Column( "usage_mode", EnumValueType(VFolderUsageMode),