From d3e379205331ed54fca3699f4685bf8c1520d888 Mon Sep 17 00:00:00 2001 From: Ashok Singamaneni Date: Fri, 31 May 2024 09:32:06 -0500 Subject: [PATCH] Updating Databricks cli, bundles version to v0.220.0 (#127) --- brickflow/bundles/model.py | 2269 +++++++++++++++++++++++++++++++----- tests/engine/test_utils.py | 2 +- tools/modify_model.py | 10 +- 3 files changed, 1991 insertions(+), 290 deletions(-) diff --git a/brickflow/bundles/model.py b/brickflow/bundles/model.py index 309a39ee..d88c1bf6 100644 --- a/brickflow/bundles/model.py +++ b/brickflow/bundles/model.py @@ -1,18 +1,19 @@ -# generated with Databricks CLI Version: Databricks CLI v0.210.2 +# generated with Databricks CLI Version: Databricks CLI v0.220.0 # generated by datamodel-codegen: # filename: transformed_schema.json from __future__ import annotations -from typing import Any, Dict, List, Optional +from typing import Dict, List, Optional, Union -from pydantic import BaseModel, Extra, Field +from pydantic import BaseModel, Extra, Field, constr from typing_extensions import Literal class Files(BaseModel): class Config: extra = "forbid" + protected_namespaces = () source: str @@ -20,16 +21,58 @@ class Config: class Artifacts(BaseModel): class Config: extra = "forbid" + protected_namespaces = () build: Optional[str] = None + executable: Optional[str] = None files: Optional[List[Files]] = None path: Optional[str] = None type: str +class DeploymentLock(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + enabled: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = None + force: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = None + + +class Deployment(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + fail_on_active_runs: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = None + lock: DeploymentLock + + class Git(BaseModel): class Config: extra = "forbid" + protected_namespaces = () branch: Optional[str] = None origin_url: Optional[str] = None @@ -38,23 +81,43 @@ class Config: class Bundle(BaseModel): class Config: extra = "forbid" + protected_namespaces = () compute_id: Optional[str] = None + databricks_cli_version: Optional[str] = None + deployment: Optional[Deployment] = None git: Optional[Git] = None - name: str = Field(..., description='The name of the bundle.') + name: str class Experimental(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - python_wheel_wrapper: Optional[bool] = None + python_wheel_wrapper: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = None scripts: Optional[Dict[str, str]] = None + use_legacy_run_as: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = None class Permissions(BaseModel): class Config: extra = "forbid" + protected_namespaces = () group_name: Optional[str] = None level: str @@ -65,6 +128,7 @@ class Config: class ExperimentsPermissions(BaseModel): class Config: extra = "forbid" + protected_namespaces = () group_name: Optional[str] = None level: str @@ -75,6 +139,7 @@ class Config: class ExperimentsTags(BaseModel): class Config: extra = "forbid" + protected_namespaces = () key: Optional[str] = Field(None, description='The tag key.') value: Optional[str] = Field(None, description='The tag value.') @@ -83,15 +148,30 @@ class Config: class Experiments(BaseModel): class Config: extra = "forbid" + protected_namespaces = () artifact_location: Optional[str] = Field( None, description='Location where artifacts for the experiment are stored.' ) - creation_time: Optional[float] = Field(None, description='Creation time') + creation_time: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='Creation time') experiment_id: Optional[str] = Field( None, description='Unique identifier for the experiment.' ) - last_update_time: Optional[float] = Field(None, description='Last update time') + last_update_time: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='Last update time') lifecycle_stage: Optional[str] = Field( None, description='Current life cycle stage of the experiment: "active" or "deleted".\nDeleted experiments are not returned by APIs.', @@ -105,42 +185,25 @@ class Config: ) -class JobsComputeSpec(BaseModel): - class Config: - extra = "forbid" - - kind: Optional[str] = Field( - None, description='The kind of compute described by this compute specification.' - ) - - -class JobsCompute(BaseModel): - class Config: - extra = "forbid" - - compute_key: str = Field( - ..., - description='A unique name for the compute requirement. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine the compute requirements for the task execution.', - ) - spec: JobsComputeSpec - - class JobsContinuous(BaseModel): class Config: extra = "forbid" + protected_namespaces = () pause_status: Optional[str] = Field( - None, description='Indicate whether this schedule is paused or not.' + None, + description='Indicate whether the continuous execution of the job is paused or not. Defaults to UNPAUSED.', ) class JobsDeployment(BaseModel): class Config: extra = "forbid" + protected_namespaces = () kind: str = Field( ..., - description='The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle.\n', + description='The kind of deployment that manages the job.\n\n* `BUNDLE`: The job is managed by Databricks Asset Bundle.', ) metadata_file_path: Optional[str] = Field( None, description='Path of the file that contains deployment metadata.' @@ -150,8 +213,16 @@ class Config: class JobsEmailNotifications(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - no_alert_for_skipped_runs: Optional[bool] = Field( + no_alert_for_skipped_runs: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If true, do not send email to recipients specified in `on_failure` if the run is skipped.', ) @@ -173,9 +244,36 @@ class Config: ) +class JobsSpec(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + client: str = Field( + ..., + description='Client version used by the environment\nThe client is the user-facing environment of the runtime.\nEach client comes with a specific set of pre-installed libraries.\nThe version is a string, consisting of the major client version.', + ) + dependencies: Optional[List[str]] = Field( + None, + description='List of pip dependencies, as supported by the version of pip in this environment.\nEach dependency is a pip requirement file line https://pip.pypa.io/en/stable/reference/requirements-file-format/\nAllowed dependency could be , , (WSFS or Volumes in Databricks), \nE.g. dependencies: ["foo==0.0.1", "-r /Workspace/test/requirements.txt"]', + ) + + +class Jobs(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + environment_key: str = Field( + ..., description='The key of an environment. It has to be unique within a job.' + ) + spec: Optional[JobsSpec] = None + + class JobsGitSourceGitSnapshot(BaseModel): class Config: extra = "forbid" + protected_namespaces = () used_commit: Optional[str] = Field( None, @@ -186,10 +284,11 @@ class Config: class JobsGitSourceJobSource(BaseModel): class Config: extra = "forbid" + protected_namespaces = () dirty_state: Optional[str] = Field( None, - description='Dirty state indicates the job is not fully synced with the job specification in the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.\n', + description='Dirty state indicates the job is not fully synced with the job specification in the remote repository.\n\nPossible values are:\n* `NOT_SYNCED`: The job is not yet synced with the remote job specification. Import the remote job specification from UI to make the job fully synced.\n* `DISCONNECTED`: The job is temporary disconnected from the remote job specification and is allowed for live edit. Import the remote job specification again from UI to make the job fully synced.', ) import_from_git_branch: str = Field( ..., description='Name of the branch which the job is imported from.' @@ -203,6 +302,7 @@ class Config: class JobsGitSource(BaseModel): class Config: extra = "forbid" + protected_namespaces = () git_branch: Optional[str] = Field( None, @@ -230,11 +330,17 @@ class Config: class JobsHealthRules(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - metric: Optional[str] = None - op: Optional[str] = None - value: Optional[float] = Field( - None, + metric: str + op: str + value: Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] = Field( + ..., description='Specifies the threshold value that the health metric should obey to satisfy the health rule.', ) @@ -242,6 +348,7 @@ class Config: class JobsHealth(BaseModel): class Config: extra = "forbid" + protected_namespaces = () rules: Optional[List[JobsHealthRules]] = None @@ -249,13 +356,28 @@ class Config: class JobsJobClustersNewClusterAutoscale(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - max_workers: float = Field( - ..., + max_workers: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, description='The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`.', ) - min_workers: float = Field( - ..., + min_workers: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, description='The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation.', ) @@ -263,22 +385,62 @@ class Config: class JobsJobClustersNewClusterAwsAttributes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () availability: Optional[str] = None - ebs_volume_count: Optional[float] = Field( + ebs_volume_count: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden.', ) - ebs_volume_iops: Optional[float] = Field(None, description='') - ebs_volume_size: Optional[float] = Field( + ebs_volume_iops: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.', + ) + ebs_volume_size: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096.', ) - ebs_volume_throughput: Optional[float] = Field( - None, description='' + ebs_volume_throughput: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.', ) ebs_volume_type: Optional[str] = None - first_on_demand: Optional[float] = Field( + first_on_demand: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.', ) @@ -286,7 +448,14 @@ class Config: None, description='Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists.', ) - spot_bid_price_percent: Optional[float] = Field( + spot_bid_price_percent: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description="The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.", ) @@ -299,6 +468,7 @@ class Config: class JobsJobClustersNewClusterAzureAttributesLogAnalyticsInfo(BaseModel): class Config: extra = "forbid" + protected_namespaces = () log_analytics_primary_key: Optional[str] = Field( None, description='' @@ -311,16 +481,31 @@ class Config: class JobsJobClustersNewClusterAzureAttributes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () availability: Optional[str] = None - first_on_demand: Optional[float] = Field( + first_on_demand: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.', ) log_analytics_info: Optional[ JobsJobClustersNewClusterAzureAttributesLogAnalyticsInfo ] = None - spot_bid_max_price: Optional[float] = Field( + spot_bid_max_price: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should > 0 or -1.', ) @@ -329,25 +514,32 @@ class Config: class JobsJobClustersNewClusterClusterLogConfDbfs(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, description='dbfs destination, e.g. `dbfs:/my/path`' - ) + destination: str = Field(..., description='dbfs destination, e.g. `dbfs:/my/path`') class JobsJobClustersNewClusterClusterLogConfS(BaseModel): class Config: extra = "forbid" + protected_namespaces = () canned_acl: Optional[str] = Field( None, description='(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs.', ) - destination: Optional[str] = Field( - None, + destination: str = Field( + ..., description='S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs.', ) - enable_encryption: Optional[bool] = Field( + enable_encryption: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='(Optional) Flag to enable server side encryption, `false` by default.', ) @@ -372,6 +564,7 @@ class Config: class JobsJobClustersNewClusterClusterLogConf(BaseModel): class Config: extra = "forbid" + protected_namespaces = () dbfs: Optional[JobsJobClustersNewClusterClusterLogConfDbfs] = None s3: Optional[JobsJobClustersNewClusterClusterLogConfS] = None @@ -380,6 +573,7 @@ class Config: class JobsJobClustersNewClusterDockerImageBasicAuth(BaseModel): class Config: extra = "forbid" + protected_namespaces = () password: Optional[str] = Field(None, description='Password of the user') username: Optional[str] = Field(None, description='Name of the user') @@ -388,6 +582,7 @@ class Config: class JobsJobClustersNewClusterDockerImage(BaseModel): class Config: extra = "forbid" + protected_namespaces = () basic_auth: Optional[JobsJobClustersNewClusterDockerImageBasicAuth] = None url: Optional[str] = Field(None, description='URL of the docker image.') @@ -396,50 +591,109 @@ class Config: class JobsJobClustersNewClusterGcpAttributes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () availability: Optional[str] = None - boot_disk_size: Optional[float] = Field(None, description='boot disk size in GB') + boot_disk_size: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='boot disk size in GB') google_service_account: Optional[str] = Field( None, description='If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator.', ) - local_ssd_count: Optional[float] = Field( + local_ssd_count: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type.', ) + use_preemptible_executors: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead.', + ) + zone_id: Optional[str] = Field( + None, + description='Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- "AUTO" => Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones.', + ) -class JobsJobClustersNewClusterInitScriptsDbfs(BaseModel): +class JobsJobClustersNewClusterInitScriptsAbfss(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, description='dbfs destination, e.g. `dbfs:/my/path`' + destination: str = Field( + ..., + description='abfss destination, e.g. `abfss://@.dfs.core.windows.net/`.', ) +class JobsJobClustersNewClusterInitScriptsDbfs(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination: str = Field(..., description='dbfs destination, e.g. `dbfs:/my/path`') + + class JobsJobClustersNewClusterInitScriptsFile(BaseModel): class Config: extra = "forbid" + protected_namespaces = () + + destination: str = Field( + ..., description='local file destination, e.g. `file:/my/local/file.sh`' + ) + + +class JobsJobClustersNewClusterInitScriptsGcs(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, description='local file destination, e.g. `file:/my/local/file.sh`' + destination: str = Field( + ..., description='GCS destination/URI, e.g. `gs://my-bucket/some-prefix`' ) class JobsJobClustersNewClusterInitScriptsS(BaseModel): class Config: extra = "forbid" + protected_namespaces = () canned_acl: Optional[str] = Field( None, description='(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs.', ) - destination: Optional[str] = Field( - None, + destination: str = Field( + ..., description='S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs.', ) - enable_encryption: Optional[bool] = Field( + enable_encryption: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='(Optional) Flag to enable server side encryption, `false` by default.', ) @@ -464,9 +718,10 @@ class Config: class JobsJobClustersNewClusterInitScriptsVolumes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, + destination: str = Field( + ..., description='Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`', ) @@ -474,9 +729,10 @@ class Config: class JobsJobClustersNewClusterInitScriptsWorkspace(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, + destination: str = Field( + ..., description='workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`', ) @@ -484,9 +740,12 @@ class Config: class JobsJobClustersNewClusterInitScripts(BaseModel): class Config: extra = "forbid" + protected_namespaces = () + abfss: Optional[JobsJobClustersNewClusterInitScriptsAbfss] = None dbfs: Optional[JobsJobClustersNewClusterInitScriptsDbfs] = None file: Optional[JobsJobClustersNewClusterInitScriptsFile] = None + gcs: Optional[JobsJobClustersNewClusterInitScriptsGcs] = None s3: Optional[JobsJobClustersNewClusterInitScriptsS] = None volumes: Optional[JobsJobClustersNewClusterInitScriptsVolumes] = None workspace: Optional[JobsJobClustersNewClusterInitScriptsWorkspace] = None @@ -495,11 +754,24 @@ class Config: class JobsJobClustersNewClusterWorkloadTypeClients(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - jobs: Optional[bool] = Field( - None, description='With jobs set, the cluster can be used for jobs' - ) - notebooks: Optional[bool] = Field( + jobs: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='With jobs set, the cluster can be used for jobs') + notebooks: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='With notebooks set, this cluster can be used for notebooks' ) @@ -507,17 +779,36 @@ class Config: class JobsJobClustersNewClusterWorkloadType(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - clients: Optional[JobsJobClustersNewClusterWorkloadTypeClients] = None + clients: JobsJobClustersNewClusterWorkloadTypeClients class JobsJobClustersNewCluster(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - apply_policy_default_values: Optional[bool] = None + apply_policy_default_values: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied.', + ) autoscale: Optional[JobsJobClustersNewClusterAutoscale] = None - autotermination_minutes: Optional[float] = Field( + autotermination_minutes: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination.', ) @@ -528,7 +819,6 @@ class Config: None, description="Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n", ) - cluster_source: Optional[str] = None custom_tags: Optional[Dict[str, str]] = None data_security_mode: Optional[ Literal[ @@ -553,13 +843,25 @@ class Config: None, description='The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n', ) - enable_elastic_disk: Optional[bool] = Field( + enable_elastic_disk: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details.', ) - enable_local_disk_encryption: Optional[bool] = Field( - None, description="Whether to enable LUKS on cluster VMs' local disks" - ) + enable_local_disk_encryption: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description="Whether to enable LUKS on cluster VMs' local disks") gcp_attributes: Optional[JobsJobClustersNewClusterGcpAttributes] = None init_scripts: Optional[List[JobsJobClustersNewClusterInitScripts]] = Field( None, @@ -573,7 +875,14 @@ class Config: None, description='This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n', ) - num_workers: Optional[float] = Field( + num_workers: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.', ) @@ -601,23 +910,39 @@ class Config: class JobsJobClusters(BaseModel): class Config: extra = "forbid" + protected_namespaces = () job_cluster_key: str = Field( ..., description='A unique name for the job cluster. This field is required and must be unique within the job.\n`JobTaskSettings` may refer to this field to determine which cluster to launch for the task execution.', ) - new_cluster: Optional[JobsJobClustersNewCluster] = None + new_cluster: JobsJobClustersNewCluster class JobsNotificationSettings(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - no_alert_for_canceled_runs: Optional[bool] = Field( + no_alert_for_canceled_runs: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.', ) - no_alert_for_skipped_runs: Optional[bool] = Field( + no_alert_for_skipped_runs: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.', ) @@ -626,6 +951,7 @@ class Config: class JobsParameters(BaseModel): class Config: extra = "forbid" + protected_namespaces = () default: str = Field(..., description='Default value of the parameter.') name: str = Field( @@ -637,6 +963,7 @@ class Config: class JobsPermissions(BaseModel): class Config: extra = "forbid" + protected_namespaces = () group_name: Optional[str] = None level: str @@ -647,8 +974,14 @@ class Config: class JobsQueue(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - enabled: bool = Field( + enabled: Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] = Field( ..., description='If true, enable queueing for the job. This is a required field.', ) @@ -657,6 +990,7 @@ class Config: class JobsRunAs(BaseModel): class Config: extra = "forbid" + protected_namespaces = () service_principal_name: Optional[str] = Field( None, @@ -671,34 +1005,36 @@ class Config: class JobsSchedule(BaseModel): class Config: extra = "forbid" + protected_namespaces = () pause_status: Optional[str] = Field( None, description='Indicate whether this schedule is paused or not.' ) quartz_cron_expression: str = Field( ..., - description='A Cron expression using Quartz syntax that describes the schedule for a job.\nSee [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html)\nfor details. This field is required."\n', + description='A Cron expression using Quartz syntax that describes the schedule for a job. See [Cron Trigger](http://www.quartz-scheduler.org/documentation/quartz-2.3.0/tutorials/crontrigger.html) for details. This field is required.', ) timezone_id: str = Field( ..., - description='A Java timezone ID. The schedule for a job is resolved with respect to this timezone.\nSee [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details.\nThis field is required.\n', + description='A Java timezone ID. The schedule for a job is resolved with respect to this timezone. See [Java TimeZone](https://docs.oracle.com/javase/7/docs/api/java/util/TimeZone.html) for details. This field is required.', ) class JobsTasksConditionTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - left: Optional[str] = Field( - None, + left: str = Field( + ..., description='The left operand of the condition task. Can be either a string value or a job state or parameter reference.', ) - op: Optional[str] = Field( - None, - description='* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.\n', + op: str = Field( + ..., + description='* `EQUAL_TO`, `NOT_EQUAL` operators perform string comparison of their operands. This means that `“12.0” == “12”` will evaluate to `false`.\n* `GREATER_THAN`, `GREATER_THAN_OR_EQUAL`, `LESS_THAN`, `LESS_THAN_OR_EQUAL` operators perform numeric comparison of their operands. `“12.0” >= “12”` will evaluate to `true`, `“10.0” >= “12”` will evaluate to `false`.\n\nThe boolean comparison to task values can be implemented with operators `EQUAL_TO`, `NOT_EQUAL`. If a task value was set to a boolean value, it will be serialized to `“true”` or `“false”` for the comparison.', ) - right: Optional[str] = Field( - None, + right: str = Field( + ..., description='The right operand of the condition task. Can be either a string value or a job state or parameter reference.', ) @@ -706,6 +1042,7 @@ class Config: class JobsTasksDbtTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () catalog: Optional[str] = Field( None, @@ -721,13 +1058,17 @@ class Config: ) project_directory: Optional[str] = Field( None, - description='Optional (relative) path to the project directory, if no value is provided, the root of the git repository is used.', + description='Path to the project directory. Optional for Git sourced tasks, in which\ncase if no value is provided, the root of the Git repository is used.', ) schema_: Optional[str] = Field( None, alias='schema', description='Optional schema to write to. This parameter is only used when a warehouse_id is also provided. If not provided, the `default` schema is used.', ) + source: Optional[str] = Field( + None, + description='Optional location type of the project directory. When set to `WORKSPACE`, the project will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the project will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: Project is located in Databricks workspace.\n* `GIT`: Project is located in cloud Git provider.', + ) warehouse_id: Optional[str] = Field( None, description='ID of the SQL warehouse to connect to. If provided, we automatically generate and provide the profile and connection details to dbt. It can be overridden on a per-command basis by using the `--profiles-dir` command line argument.', @@ -737,6 +1078,7 @@ class Config: class JobsTasksDependsOn(BaseModel): class Config: extra = "forbid" + protected_namespaces = () outcome: Optional[str] = Field( None, @@ -748,7 +1090,19 @@ class Config: class JobsTasksEmailNotifications(BaseModel): class Config: extra = "forbid" + protected_namespaces = () + no_alert_for_skipped_runs: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If true, do not send email to recipients specified in `on_failure` if the run is skipped.', + ) on_duration_warning_threshold_exceeded: Optional[List[str]] = Field( None, description='A list of email addresses to be notified when the duration of a run exceeds the threshold specified for the `RUN_DURATION_SECONDS` metric in the `health` field. If no rule for the `RUN_DURATION_SECONDS` metric is specified in the `health` field for the job, notifications are not sent.', @@ -767,14 +1121,24 @@ class Config: ) +class JobsTasksForEachTask(BaseModel): + pass + + class JobsTasksHealthRules(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - metric: Optional[str] = None - op: Optional[str] = None - value: Optional[float] = Field( - None, + metric: str + op: str + value: Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] = Field( + ..., description='Specifies the threshold value that the health metric should obey to satisfy the health rule.', ) @@ -782,6 +1146,7 @@ class Config: class JobsTasksHealth(BaseModel): class Config: extra = "forbid" + protected_namespaces = () rules: Optional[List[JobsTasksHealthRules]] = None @@ -789,6 +1154,7 @@ class Config: class JobsTasksLibrariesCran(BaseModel): class Config: extra = "forbid" + protected_namespaces = () package: str = Field(..., description='The name of the CRAN package to install.') repo: Optional[str] = Field( @@ -800,6 +1166,7 @@ class Config: class JobsTasksLibrariesMaven(BaseModel): class Config: extra = "forbid" + protected_namespaces = () coordinates: str = Field( ..., @@ -818,6 +1185,7 @@ class Config: class JobsTasksLibrariesPypi(BaseModel): class Config: extra = "forbid" + protected_namespaces = () package: str = Field( ..., @@ -832,34 +1200,54 @@ class Config: class JobsTasksLibraries(BaseModel): class Config: extra = "forbid" + protected_namespaces = () cran: Optional[JobsTasksLibrariesCran] = None egg: Optional[str] = Field( None, - description='URI of the egg to be installed. Currently only DBFS and S3 URIs are supported.\nFor example: `{ "egg": "dbfs:/my/egg" }` or\n`{ "egg": "s3://my-bucket/egg" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.', + description='URI of the egg library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ "egg": "/Workspace/path/to/library.egg" }`, `{ "egg" : "/Volumes/path/to/library.egg" }` or\n`{ "egg": "s3://my-bucket/library.egg" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.', ) jar: Optional[str] = Field( None, - description='URI of the jar to be installed. Currently only DBFS and S3 URIs are supported.\nFor example: `{ "jar": "dbfs:/mnt/databricks/library.jar" }` or\n`{ "jar": "s3://my-bucket/library.jar" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.', + description='URI of the JAR library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ "jar": "/Workspace/path/to/library.jar" }`, `{ "jar" : "/Volumes/path/to/library.jar" }` or\n`{ "jar": "s3://my-bucket/library.jar" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.', ) maven: Optional[JobsTasksLibrariesMaven] = None pypi: Optional[JobsTasksLibrariesPypi] = None + requirements: Optional[str] = Field( + None, + description='URI of the requirements.txt file to install. Only Workspace paths and Unity Catalog Volumes paths are supported.\nFor example: `{ "requirements": "/Workspace/path/to/requirements.txt" }` or `{ "requirements" : "/Volumes/path/to/requirements.txt" }`', + ) whl: Optional[str] = Field( None, - description='URI of the wheel to be installed.\nFor example: `{ "whl": "dbfs:/my/whl" }` or `{ "whl": "s3://my-bucket/whl" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.', + description='URI of the wheel library to install. Supported URIs include Workspace paths, Unity Catalog Volumes paths, and S3 URIs.\nFor example: `{ "whl": "/Workspace/path/to/library.whl" }`, `{ "whl" : "/Volumes/path/to/library.whl" }` or\n`{ "whl": "s3://my-bucket/library.whl" }`.\nIf S3 is used, please make sure the cluster has read access on the library. You may need to\nlaunch the cluster with an IAM role to access the S3 URI.', ) class JobsTasksNewClusterAutoscale(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - max_workers: float = Field( - ..., + max_workers: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, description='The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`.', ) - min_workers: float = Field( - ..., + min_workers: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, description='The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation.', ) @@ -867,22 +1255,62 @@ class Config: class JobsTasksNewClusterAwsAttributes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () availability: Optional[str] = None - ebs_volume_count: Optional[float] = Field( + ebs_volume_count: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden.', ) - ebs_volume_iops: Optional[float] = Field(None, description='') - ebs_volume_size: Optional[float] = Field( + ebs_volume_iops: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.', + ) + ebs_volume_size: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096.', ) - ebs_volume_throughput: Optional[float] = Field( - None, description='' + ebs_volume_throughput: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.', ) ebs_volume_type: Optional[str] = None - first_on_demand: Optional[float] = Field( + first_on_demand: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.', ) @@ -890,7 +1318,14 @@ class Config: None, description='Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists.', ) - spot_bid_price_percent: Optional[float] = Field( + spot_bid_price_percent: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description="The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.", ) @@ -903,6 +1338,7 @@ class Config: class JobsTasksNewClusterAzureAttributesLogAnalyticsInfo(BaseModel): class Config: extra = "forbid" + protected_namespaces = () log_analytics_primary_key: Optional[str] = Field( None, description='' @@ -915,16 +1351,31 @@ class Config: class JobsTasksNewClusterAzureAttributes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () availability: Optional[str] = None - first_on_demand: Optional[float] = Field( + first_on_demand: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.', ) log_analytics_info: Optional[ JobsTasksNewClusterAzureAttributesLogAnalyticsInfo ] = None - spot_bid_max_price: Optional[float] = Field( + spot_bid_max_price: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should > 0 or -1.', ) @@ -933,25 +1384,32 @@ class Config: class JobsTasksNewClusterClusterLogConfDbfs(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, description='dbfs destination, e.g. `dbfs:/my/path`' - ) + destination: str = Field(..., description='dbfs destination, e.g. `dbfs:/my/path`') class JobsTasksNewClusterClusterLogConfS(BaseModel): class Config: extra = "forbid" + protected_namespaces = () canned_acl: Optional[str] = Field( None, description='(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs.', ) - destination: Optional[str] = Field( - None, + destination: str = Field( + ..., description='S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs.', ) - enable_encryption: Optional[bool] = Field( + enable_encryption: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='(Optional) Flag to enable server side encryption, `false` by default.', ) @@ -976,6 +1434,7 @@ class Config: class JobsTasksNewClusterClusterLogConf(BaseModel): class Config: extra = "forbid" + protected_namespaces = () dbfs: Optional[JobsTasksNewClusterClusterLogConfDbfs] = None s3: Optional[JobsTasksNewClusterClusterLogConfS] = None @@ -984,6 +1443,7 @@ class Config: class JobsTasksNewClusterDockerImageBasicAuth(BaseModel): class Config: extra = "forbid" + protected_namespaces = () password: Optional[str] = Field(None, description='Password of the user') username: Optional[str] = Field(None, description='Name of the user') @@ -992,6 +1452,7 @@ class Config: class JobsTasksNewClusterDockerImage(BaseModel): class Config: extra = "forbid" + protected_namespaces = () basic_auth: Optional[JobsTasksNewClusterDockerImageBasicAuth] = None url: Optional[str] = Field(None, description='URL of the docker image.') @@ -1000,50 +1461,109 @@ class Config: class JobsTasksNewClusterGcpAttributes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () availability: Optional[str] = None - boot_disk_size: Optional[float] = Field(None, description='boot disk size in GB') + boot_disk_size: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='boot disk size in GB') google_service_account: Optional[str] = Field( None, description='If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator.', ) - local_ssd_count: Optional[float] = Field( + local_ssd_count: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type.', ) + use_preemptible_executors: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead.', + ) + zone_id: Optional[str] = Field( + None, + description='Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- "AUTO" => Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones.', + ) -class JobsTasksNewClusterInitScriptsDbfs(BaseModel): +class JobsTasksNewClusterInitScriptsAbfss(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, description='dbfs destination, e.g. `dbfs:/my/path`' + destination: str = Field( + ..., + description='abfss destination, e.g. `abfss://@.dfs.core.windows.net/`.', ) +class JobsTasksNewClusterInitScriptsDbfs(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination: str = Field(..., description='dbfs destination, e.g. `dbfs:/my/path`') + + class JobsTasksNewClusterInitScriptsFile(BaseModel): class Config: extra = "forbid" + protected_namespaces = () + + destination: str = Field( + ..., description='local file destination, e.g. `file:/my/local/file.sh`' + ) + + +class JobsTasksNewClusterInitScriptsGcs(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, description='local file destination, e.g. `file:/my/local/file.sh`' + destination: str = Field( + ..., description='GCS destination/URI, e.g. `gs://my-bucket/some-prefix`' ) class JobsTasksNewClusterInitScriptsS(BaseModel): class Config: extra = "forbid" + protected_namespaces = () canned_acl: Optional[str] = Field( None, description='(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs.', ) - destination: Optional[str] = Field( - None, + destination: str = Field( + ..., description='S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs.', ) - enable_encryption: Optional[bool] = Field( + enable_encryption: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='(Optional) Flag to enable server side encryption, `false` by default.', ) @@ -1068,9 +1588,10 @@ class Config: class JobsTasksNewClusterInitScriptsVolumes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, + destination: str = Field( + ..., description='Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`', ) @@ -1078,9 +1599,10 @@ class Config: class JobsTasksNewClusterInitScriptsWorkspace(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, + destination: str = Field( + ..., description='workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`', ) @@ -1088,9 +1610,12 @@ class Config: class JobsTasksNewClusterInitScripts(BaseModel): class Config: extra = "forbid" + protected_namespaces = () + abfss: Optional[JobsTasksNewClusterInitScriptsAbfss] = None dbfs: Optional[JobsTasksNewClusterInitScriptsDbfs] = None file: Optional[JobsTasksNewClusterInitScriptsFile] = None + gcs: Optional[JobsTasksNewClusterInitScriptsGcs] = None s3: Optional[JobsTasksNewClusterInitScriptsS] = None volumes: Optional[JobsTasksNewClusterInitScriptsVolumes] = None workspace: Optional[JobsTasksNewClusterInitScriptsWorkspace] = None @@ -1099,11 +1624,24 @@ class Config: class JobsTasksNewClusterWorkloadTypeClients(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - jobs: Optional[bool] = Field( - None, description='With jobs set, the cluster can be used for jobs' - ) - notebooks: Optional[bool] = Field( + jobs: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='With jobs set, the cluster can be used for jobs') + notebooks: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='With notebooks set, this cluster can be used for notebooks' ) @@ -1111,17 +1649,36 @@ class Config: class JobsTasksNewClusterWorkloadType(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - clients: Optional[JobsTasksNewClusterWorkloadTypeClients] = None + clients: JobsTasksNewClusterWorkloadTypeClients class JobsTasksNewCluster(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - apply_policy_default_values: Optional[bool] = None + apply_policy_default_values: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='When set to true, fixed and default values from the policy will be used for fields that are omitted. When set to false, only fixed values from the policy will be applied.', + ) autoscale: Optional[JobsTasksNewClusterAutoscale] = None - autotermination_minutes: Optional[float] = Field( + autotermination_minutes: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Automatically terminates the cluster after it is inactive for this time in minutes. If not set,\nthis cluster will not be automatically terminated. If specified, the threshold must be between\n10 and 10000 minutes.\nUsers can also set this value to 0 to explicitly disable automatic termination.', ) @@ -1132,7 +1689,6 @@ class Config: None, description="Cluster name requested by the user. This doesn't have to be unique.\nIf not specified at creation, the cluster name will be an empty string.\n", ) - cluster_source: Optional[str] = None custom_tags: Optional[Dict[str, str]] = None data_security_mode: Optional[ Literal[ @@ -1157,13 +1713,25 @@ class Config: None, description='The node type of the Spark driver. Note that this field is optional;\nif unset, the driver node type will be set as the same value\nas `node_type_id` defined above.\n', ) - enable_elastic_disk: Optional[bool] = Field( + enable_elastic_disk: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Autoscaling Local Storage: when enabled, this cluster will dynamically acquire additional disk\nspace when its Spark workers are running low on disk space. This feature requires specific AWS\npermissions to function correctly - refer to the User Guide for more details.', ) - enable_local_disk_encryption: Optional[bool] = Field( - None, description="Whether to enable LUKS on cluster VMs' local disks" - ) + enable_local_disk_encryption: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description="Whether to enable LUKS on cluster VMs' local disks") gcp_attributes: Optional[JobsTasksNewClusterGcpAttributes] = None init_scripts: Optional[List[JobsTasksNewClusterInitScripts]] = Field( None, @@ -1177,7 +1745,14 @@ class Config: None, description='This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n', ) - num_workers: Optional[float] = Field( + num_workers: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.', ) @@ -1205,31 +1780,58 @@ class Config: class JobsTasksNotebookTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () base_parameters: Optional[Dict[str, str]] = None notebook_path: str = Field( ..., - description='The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.\n', + description='The path of the notebook to be run in the Databricks workspace or remote repository.\nFor notebooks stored in the Databricks workspace, the path must be absolute and begin with a slash.\nFor notebooks stored in a remote repository, the path must be relative. This field is required.', ) source: Optional[str] = Field( None, - description='Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n', + description='Optional location type of the notebook. When set to `WORKSPACE`, the notebook will be retrieved from the local Databricks workspace. When set to `GIT`, the notebook will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n* `WORKSPACE`: Notebook is located in Databricks workspace.\n* `GIT`: Notebook is located in cloud Git provider.', + ) + warehouse_id: Optional[str] = Field( + None, + description='Optional `warehouse_id` to run the notebook on a SQL warehouse. Classic SQL warehouses are NOT supported, please use serverless or pro SQL warehouses.\n\nNote that SQL warehouses only support SQL cells; if the notebook contains non-SQL cells, the run will fail.', ) class JobsTasksNotificationSettings(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - alert_on_last_attempt: Optional[bool] = Field( + alert_on_last_attempt: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If true, do not send notifications to recipients specified in `on_start` for the retried runs and do not send notifications to recipients specified in `on_failure` until the last retry of the run.', ) - no_alert_for_canceled_runs: Optional[bool] = Field( + no_alert_for_canceled_runs: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If true, do not send notifications to recipients specified in `on_failure` if the run is canceled.', ) - no_alert_for_skipped_runs: Optional[bool] = Field( + no_alert_for_skipped_runs: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If true, do not send notifications to recipients specified in `on_failure` if the run is skipped.', ) @@ -1238,49 +1840,99 @@ class Config: class JobsTasksPipelineTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - full_refresh: Optional[bool] = Field( - None, - description='If true, a full refresh will be triggered on the delta live table.', + full_refresh: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, description='If true, triggers a full refresh on the delta live table.' ) - pipeline_id: Optional[str] = Field( - None, description='The full name of the pipeline task to execute.' + pipeline_id: str = Field( + ..., description='The full name of the pipeline task to execute.' ) class JobsTasksPythonWheelTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - entry_point: Optional[str] = Field( - None, + entry_point: str = Field( + ..., description='Named entry point to use, if it does not exist in the metadata of the package it executes the function from the package directly using `$packageName.$entryPoint()`', ) named_parameters: Optional[Dict[str, str]] = None - package_name: Optional[str] = Field( - None, description='Name of the package to execute' - ) + package_name: str = Field(..., description='Name of the package to execute') parameters: Optional[List[str]] = Field( None, description='Command-line parameters passed to Python wheel task. Leave it empty if `named_parameters` is not null.', ) +class JobsTasksRunJobTaskPipelineParams(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + full_refresh: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, description='If true, triggers a full refresh on the delta live table.' + ) + + class JobsTasksRunJobTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - job_id: float = Field(..., description='ID of the job to trigger.') - job_parameters: Optional[Any] = None + dbt_commands: Optional[List[str]] = Field( + None, + description='An array of commands to execute for jobs with the dbt task, for example `"dbt_commands": ["dbt deps", "dbt seed", "dbt deps", "dbt seed", "dbt run"]`', + ) + jar_params: Optional[List[str]] = Field( + None, + description='A list of parameters for jobs with Spark JAR tasks, for example `"jar_params": ["john doe", "35"]`.\nThe parameters are used to invoke the main function of the main class specified in the Spark JAR task.\nIf not specified upon `run-now`, it defaults to an empty list.\njar_params cannot be specified in conjunction with notebook_params.\nThe JSON representation of this field (for example `{"jar_params":["john doe","35"]}`) cannot exceed 10,000 bytes.\n\nUse [Task parameter variables](/jobs.html\\"#parameter-variables\\") to set parameters containing information about job runs.', + ) + job_id: Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] = Field(..., description='ID of the job to trigger.') + job_parameters: Optional[Dict[str, str]] = None + notebook_params: Optional[Dict[str, str]] = None + pipeline_params: Optional[JobsTasksRunJobTaskPipelineParams] = None + python_named_params: Optional[Dict[str, str]] = None + python_params: Optional[List[str]] = Field( + None, + description='A list of parameters for jobs with Python tasks, for example `"python_params": ["john doe", "35"]`.\nThe parameters are passed to Python file as command-line parameters. If specified upon `run-now`, it would overwrite\nthe parameters specified in job setting. The JSON representation of this field (for example `{"python_params":["john doe","35"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.', + ) + spark_submit_params: Optional[List[str]] = Field( + None, + description='A list of parameters for jobs with spark submit task, for example `"spark_submit_params": ["--class", "org.apache.spark.examples.SparkPi"]`.\nThe parameters are passed to spark-submit script as command-line parameters. If specified upon `run-now`, it would overwrite the\nparameters specified in job setting. The JSON representation of this field (for example `{"python_params":["john doe","35"]}`)\ncannot exceed 10,000 bytes.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs\n\nImportant\n\nThese parameters accept only Latin characters (ASCII character set). Using non-ASCII characters returns an error.\nExamples of invalid, non-ASCII characters are Chinese, Japanese kanjis, and emojis.', + ) + sql_params: Optional[Dict[str, str]] = None class JobsTasksSparkJarTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () jar_uri: Optional[str] = Field( None, - description='Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.\n', + description='Deprecated since 04/2016. Provide a `jar` through the `libraries` field instead. For an example, see :method:jobs/create.', ) main_class_name: Optional[str] = Field( None, @@ -1288,17 +1940,18 @@ class Config: ) parameters: Optional[List[str]] = Field( None, - description='Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n', + description='Parameters passed to the main method.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.', ) class JobsTasksSparkPythonTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () parameters: Optional[List[str]] = Field( None, - description='Command line parameters passed to the Python file.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n', + description='Command line parameters passed to the Python file.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.', ) python_file: str = Field( ..., @@ -1306,23 +1959,25 @@ class Config: ) source: Optional[str] = Field( None, - description='Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved\nfrom the local workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.\n', + description='Optional location type of the Python file. When set to `WORKSPACE` or not specified, the file will be retrieved from the local\nDatabricks workspace or cloud location (if the `python_file` has a URI format). When set to `GIT`,\nthe Python file will be retrieved from a Git repository defined in `git_source`.\n\n* `WORKSPACE`: The Python file is located in a Databricks workspace or at a cloud filesystem URI.\n* `GIT`: The Python file is located in a remote Git repository.', ) class JobsTasksSparkSubmitTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () parameters: Optional[List[str]] = Field( None, - description='Command-line parameters passed to spark submit.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.\n', + description='Command-line parameters passed to spark submit.\n\nUse [Task parameter variables](https://docs.databricks.com/jobs.html#parameter-variables) to set parameters containing information about job runs.', ) class JobsTasksSqlTaskAlertSubscriptions(BaseModel): class Config: extra = "forbid" + protected_namespaces = () destination_id: Optional[str] = Field( None, @@ -1337,9 +1992,17 @@ class Config: class JobsTasksSqlTaskAlert(BaseModel): class Config: extra = "forbid" + protected_namespaces = () alert_id: str = Field(..., description='The canonical identifier of the SQL alert.') - pause_subscriptions: Optional[bool] = Field( + pause_subscriptions: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If true, the alert notifications are not sent to subscribers.', ) @@ -1351,6 +2014,7 @@ class Config: class JobsTasksSqlTaskDashboardSubscriptions(BaseModel): class Config: extra = "forbid" + protected_namespaces = () destination_id: Optional[str] = Field( None, @@ -1365,6 +2029,7 @@ class Config: class JobsTasksSqlTaskDashboard(BaseModel): class Config: extra = "forbid" + protected_namespaces = () custom_subject: Optional[str] = Field( None, description='Subject of the email sent to subscribers of this task.' @@ -1372,7 +2037,14 @@ class Config: dashboard_id: str = Field( ..., description='The canonical identifier of the SQL dashboard.' ) - pause_subscriptions: Optional[bool] = Field( + pause_subscriptions: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If true, the dashboard snapshot is not taken, and emails are not sent to subscribers.', ) @@ -1384,15 +2056,22 @@ class Config: class JobsTasksSqlTaskFile(BaseModel): class Config: extra = "forbid" + protected_namespaces = () path: str = Field( - ..., description='Relative path of the SQL file in the remote Git repository.' + ..., + description='Path of the SQL file. Must be relative if the source is a remote Git repository and absolute for workspace paths.', + ) + source: Optional[str] = Field( + None, + description='Optional location type of the SQL file. When set to `WORKSPACE`, the SQL file will be retrieved\nfrom the local Databricks workspace. When set to `GIT`, the SQL file will be retrieved from a Git repository\ndefined in `git_source`. If the value is empty, the task will use `GIT` if `git_source` is defined and `WORKSPACE` otherwise.\n\n* `WORKSPACE`: SQL file is located in Databricks workspace.\n* `GIT`: SQL file is located in cloud Git provider.', ) class JobsTasksSqlTaskQuery(BaseModel): class Config: extra = "forbid" + protected_namespaces = () query_id: str = Field(..., description='The canonical identifier of the SQL query.') @@ -1400,6 +2079,7 @@ class Config: class JobsTasksSqlTask(BaseModel): class Config: extra = "forbid" + protected_namespaces = () alert: Optional[JobsTasksSqlTaskAlert] = None dashboard: Optional[JobsTasksSqlTaskDashboard] = None @@ -1415,34 +2095,39 @@ class Config: class JobsTasksWebhookNotificationsOnDurationWarningThresholdExceeded(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - id: Optional[str] = None + id: str class JobsTasksWebhookNotificationsOnFailure(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - id: Optional[str] = None + id: str class JobsTasksWebhookNotificationsOnStart(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - id: Optional[str] = None + id: str class JobsTasksWebhookNotificationsOnSuccess(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - id: Optional[str] = None + id: str class JobsTasksWebhookNotifications(BaseModel): class Config: extra = "forbid" + protected_namespaces = () on_duration_warning_threshold_exceeded: Optional[ List[JobsTasksWebhookNotificationsOnDurationWarningThresholdExceeded] @@ -1467,25 +2152,35 @@ class Config: class JobsTasks(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - compute_key: Optional[str] = Field( - None, - description='The key of the compute requirement, specified in `job.settings.compute`, to use for execution of this task.', - ) condition_task: Optional[JobsTasksConditionTask] = None dbt_task: Optional[JobsTasksDbtTask] = None depends_on: Optional[List[JobsTasksDependsOn]] = Field( None, - description='An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.\nThe key is `task_key`, and the value is the name assigned to the dependent task.\n', + description='An optional array of objects specifying the dependency graph of the task. All tasks specified in this field must complete before executing this task. The task will run only if the `run_if` condition is true.\nThe key is `task_key`, and the value is the name assigned to the dependent task.', ) description: Optional[str] = Field( None, description='An optional description for this task.' ) + disable_auto_optimization: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='An option to disable auto optimization in serverless') email_notifications: Optional[JobsTasksEmailNotifications] = None + environment_key: Optional[str] = Field( + None, + description='The key that references an environment spec in a job. This field is required for Python script, Python wheel and dbt tasks when using serverless compute.', + ) existing_cluster_id: Optional[str] = Field( None, - description='If existing_cluster_id, the ID of an existing cluster that is used for all runs of this task. When running tasks on an existing cluster, you may need to manually restart the cluster if it stops responding. We suggest running jobs on new clusters for greater reliability.', + description='If existing_cluster_id, the ID of an existing cluster that is used for all runs.\nWhen running jobs or tasks on an existing cluster, you may need to manually restart\nthe cluster if it stops responding. We suggest running jobs and tasks on new clusters for\ngreater reliability', ) + for_each_task: Optional[JobsTasksForEachTask] = None health: Optional[JobsTasksHealth] = None job_cluster_key: Optional[str] = Field( None, @@ -1493,13 +2188,27 @@ class Config: ) libraries: Optional[List[JobsTasksLibraries]] = Field( None, - description='An optional list of libraries to be installed on the cluster that executes the task. The default value is an empty list.', + description='An optional list of libraries to be installed on the cluster.\nThe default value is an empty list.', ) - max_retries: Optional[float] = Field( + max_retries: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='An optional maximum number of times to retry an unsuccessful run. A run is considered to be unsuccessful if it completes with the `FAILED` result_state or `INTERNAL_ERROR` `life_cycle_state`. The value `-1` means to retry indefinitely and the value `0` means to never retry.', ) - min_retry_interval_millis: Optional[float] = Field( + min_retry_interval_millis: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='An optional minimal interval in milliseconds between the start of the failed run and the subsequent retry run. The default behavior is that unsuccessful runs are immediately retried.', ) @@ -1508,13 +2217,20 @@ class Config: notification_settings: Optional[JobsTasksNotificationSettings] = None pipeline_task: Optional[JobsTasksPipelineTask] = None python_wheel_task: Optional[JobsTasksPythonWheelTask] = None - retry_on_timeout: Optional[bool] = Field( + retry_on_timeout: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, - description='An optional policy to specify whether to retry a task when it times out.', + description='An optional policy to specify whether to retry a job when it times out. The default behavior\nis to not retry on timeout.', ) run_if: Optional[str] = Field( None, - description='An optional value specifying the condition determining whether the task is run once its dependencies have been completed.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed\n', + description='An optional value specifying the condition determining whether the task is run once its dependencies have been completed.\n\n* `ALL_SUCCESS`: All dependencies have executed and succeeded\n* `AT_LEAST_ONE_SUCCESS`: At least one dependency has succeeded\n* `NONE_FAILED`: None of the dependencies have failed and at least one was executed\n* `ALL_DONE`: All dependencies have been completed\n* `AT_LEAST_ONE_FAILED`: At least one dependency failed\n* `ALL_FAILED`: ALl dependencies have failed', ) run_job_task: Optional[JobsTasksRunJobTask] = None spark_jar_task: Optional[JobsTasksSparkJarTask] = None @@ -1525,7 +2241,14 @@ class Config: ..., description='A unique name for the task. This field is used to refer to this task from other tasks.\nThis field is required and must be unique within its parent job.\nOn Update or Reset, this field is used to reference the tasks to be updated or reset.', ) - timeout_seconds: Optional[int] = Field( + timeout_seconds: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='An optional timeout applied to each run of this job task. A value of `0` means no timeout.', ) @@ -1535,62 +2258,157 @@ class Config: class JobsTriggerFileArrival(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - min_time_between_triggers_seconds: Optional[float] = Field( + min_time_between_triggers_seconds: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, - description='If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds\n', + description='If set, the trigger starts a run only after the specified amount of time passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds', ) - url: Optional[str] = Field( - None, + url: str = Field( + ..., description='URL to be monitored for file arrivals. The path must point to the root or a subpath of the external location.', ) - wait_after_last_change_seconds: Optional[float] = Field( + wait_after_last_change_seconds: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.', + ) + + +class JobsTriggerTable(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + condition: Optional[str] = Field( + None, description='The table(s) condition based on which to trigger a job run.' + ) + min_time_between_triggers_seconds: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds.', + ) + table_names: Optional[List[str]] = Field( + None, + description='A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.', + ) + wait_after_last_change_seconds: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds.', + ) + + +class JobsTriggerTableUpdate(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + condition: Optional[str] = Field( + None, description='The table(s) condition based on which to trigger a job run.' + ) + min_time_between_triggers_seconds: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If set, the trigger starts a run only after the specified amount of time has passed since\nthe last time the trigger fired. The minimum allowed value is 60 seconds.', + ) + table_names: Optional[List[str]] = Field( + None, + description='A list of Delta tables to monitor for changes. The table name must be in the format `catalog_name.schema_name.table_name`.', + ) + wait_after_last_change_seconds: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, - description='If set, the trigger starts a run only after no file activity has occurred for the specified amount of time.\nThis makes it possible to wait for a batch of incoming files to arrive before triggering a run. The\nminimum allowed value is 60 seconds.\n', + description='If set, the trigger starts a run only after no table updates have occurred for the specified time\nand can be used to wait for a series of table updates before triggering a run. The\nminimum allowed value is 60 seconds.', ) class JobsTrigger(BaseModel): class Config: extra = "forbid" + protected_namespaces = () file_arrival: Optional[JobsTriggerFileArrival] = None pause_status: Optional[str] = Field( - None, description='Indicate whether this schedule is paused or not.' + None, description='Whether this trigger is paused or not.' ) + table: Optional[JobsTriggerTable] = None + table_update: Optional[JobsTriggerTableUpdate] = None class JobsWebhookNotificationsOnDurationWarningThresholdExceeded(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - id: Optional[str] = None + id: str class JobsWebhookNotificationsOnFailure(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - id: Optional[str] = None + id: str class JobsWebhookNotificationsOnStart(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - id: Optional[str] = None + id: str class JobsWebhookNotificationsOnSuccess(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - id: Optional[str] = None + id: str class JobsWebhookNotifications(BaseModel): class Config: extra = "forbid" + protected_namespaces = () on_duration_warning_threshold_exceeded: Optional[ List[JobsWebhookNotificationsOnDurationWarningThresholdExceeded] @@ -1615,11 +2433,8 @@ class Config: class Jobs(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - compute: Optional[List[JobsCompute]] = Field( - None, - description='A list of compute requirements that can be referenced by tasks of this job.', - ) continuous: Optional[JobsContinuous] = None deployment: Optional[JobsDeployment] = None description: Optional[str] = Field( @@ -1628,9 +2443,13 @@ class Config: ) edit_mode: Optional[str] = Field( None, - description='Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.\n', + description='Edit mode of the job.\n\n* `UI_LOCKED`: The job is in a locked UI state and cannot be modified.\n* `EDITABLE`: The job is in an editable state and can be modified.', ) email_notifications: Optional[JobsEmailNotifications] = None + environments: Optional[List[Jobs]] = Field( + None, + description='A list of task execution environment specifications that can be referenced by tasks of this job.', + ) format: Optional[str] = Field( None, description='Used to tell what is the format of the job. This field is ignored in Create/Update/Reset calls. When using the Jobs API 2.1 this value is always set to `"MULTI_TASK"`.', @@ -1641,9 +2460,16 @@ class Config: None, description='A list of job cluster specifications that can be shared and reused by tasks of this job. Libraries cannot be declared in a shared job cluster. You must declare dependent libraries in task settings.', ) - max_concurrent_runs: Optional[float] = Field( + max_concurrent_runs: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, - description='An optional maximum allowed number of concurrent runs of the job.\n\nSet this value if you want to be able to execute multiple runs of the same job concurrently. This is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.\n\nThis setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs. However, from then on, new runs are skipped unless there are fewer than 3 active runs.\n\nThis value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped.', + description='An optional maximum allowed number of concurrent runs of the job.\nSet this value if you want to be able to execute multiple runs of the same job concurrently.\nThis is useful for example if you trigger your job on a frequent schedule and want to allow consecutive runs to overlap with each other, or if you want to trigger multiple runs which differ by their input parameters.\nThis setting affects only new runs. For example, suppose the job’s concurrency is 4 and there are 4 concurrent active runs. Then setting the concurrency to 3 won’t kill any of the active runs.\nHowever, from then on, new runs are skipped unless there are fewer than 3 active runs.\nThis value cannot exceed 1000. Setting this value to `0` causes all new runs to be skipped.', ) name: Optional[str] = Field( None, @@ -1661,7 +2487,14 @@ class Config: tasks: Optional[List[JobsTasks]] = Field( None, description='A list of task specifications to be executed by this job.' ) - timeout_seconds: Optional[int] = Field( + timeout_seconds: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='An optional timeout applied to each run of this job. A value of `0` means no timeout.', ) @@ -1669,9 +2502,266 @@ class Config: webhook_notifications: Optional[JobsWebhookNotifications] = None +class ModelServingEndpointsConfigAutoCaptureConfig(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + catalog_name: Optional[str] = Field( + None, + description='The name of the catalog in Unity Catalog. NOTE: On update, you cannot change the catalog name if it was already set.', + ) + enabled: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If inference tables are enabled or not. NOTE: If you have already disabled payload logging once, you cannot enable again.', + ) + schema_name: Optional[str] = Field( + None, + description='The name of the schema in Unity Catalog. NOTE: On update, you cannot change the schema name if it was already set.', + ) + table_name_prefix: Optional[str] = Field( + None, + description='The prefix of the table in Unity Catalog. NOTE: On update, you cannot change the prefix name if it was already set.', + ) + + +class ModelServingEndpointsConfigServedEntitiesExternalModelAi21labsConfig(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + ai21labs_api_key: str = Field( + ..., description='The Databricks secret key reference for an AI21Labs API key.' + ) + + +class ModelServingEndpointsConfigServedEntitiesExternalModelAmazonBedrockConfig( + BaseModel +): + class Config: + extra = "forbid" + protected_namespaces = () + + aws_access_key_id: str = Field( + ..., + description='The Databricks secret key reference for an AWS Access Key ID with permissions to interact with Bedrock services.', + ) + aws_region: str = Field( + ..., description='The AWS region to use. Bedrock has to be enabled there.' + ) + aws_secret_access_key: str = Field( + ..., + description='The Databricks secret key reference for an AWS Secret Access Key paired with the access key ID, with permissions to interact with Bedrock services.', + ) + bedrock_provider: str = Field( + ..., + description='The underlying provider in Amazon Bedrock. Supported values (case insensitive) include: Anthropic, Cohere, AI21Labs, Amazon.', + ) + + +class ModelServingEndpointsConfigServedEntitiesExternalModelAnthropicConfig(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + anthropic_api_key: str = Field( + ..., description='The Databricks secret key reference for an Anthropic API key.' + ) + + +class ModelServingEndpointsConfigServedEntitiesExternalModelCohereConfig(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + cohere_api_key: str = Field( + ..., description='The Databricks secret key reference for a Cohere API key.' + ) + + +class ModelServingEndpointsConfigServedEntitiesExternalModelDatabricksModelServingConfig( + BaseModel +): + class Config: + extra = "forbid" + protected_namespaces = () + + databricks_api_token: str = Field( + ..., + description='The Databricks secret key reference for a Databricks API token that corresponds to a user or service\nprincipal with Can Query access to the model serving endpoint pointed to by this external model.\n', + ) + databricks_workspace_url: str = Field( + ..., + description='The URL of the Databricks workspace containing the model serving endpoint pointed to by this external model.\n', + ) + + +class ModelServingEndpointsConfigServedEntitiesExternalModelOpenaiConfig(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + microsoft_entra_client_id: Optional[str] = Field( + None, + description='This field is only required for Azure AD OpenAI and is the Microsoft Entra Client ID.\n', + ) + microsoft_entra_client_secret: Optional[str] = Field( + None, + description='The Databricks secret key reference for the Microsoft Entra Client Secret that is\nonly required for Azure AD OpenAI.\n', + ) + microsoft_entra_tenant_id: Optional[str] = Field( + None, + description='This field is only required for Azure AD OpenAI and is the Microsoft Entra Tenant ID.\n', + ) + openai_api_base: Optional[str] = Field( + None, + description='This is the base URL for the OpenAI API (default: "https://api.openai.com/v1").\nFor Azure OpenAI, this field is required, and is the base URL for the Azure OpenAI API service\nprovided by Azure.\n', + ) + openai_api_key: Optional[str] = Field( + None, + description='The Databricks secret key reference for an OpenAI or Azure OpenAI API key.', + ) + openai_api_type: Optional[str] = Field( + None, + description='This is an optional field to specify the type of OpenAI API to use.\nFor Azure OpenAI, this field is required, and adjust this parameter to represent the preferred security\naccess validation protocol. For access token validation, use azure. For authentication using Azure Active\nDirectory (Azure AD) use, azuread.\n', + ) + openai_api_version: Optional[str] = Field( + None, + description='This is an optional field to specify the OpenAI API version.\nFor Azure OpenAI, this field is required, and is the version of the Azure OpenAI service to\nutilize, specified by a date.\n', + ) + openai_deployment_name: Optional[str] = Field( + None, + description='This field is only required for Azure OpenAI and is the name of the deployment resource for the\nAzure OpenAI service.\n', + ) + openai_organization: Optional[str] = Field( + None, + description='This is an optional field to specify the organization in OpenAI or Azure OpenAI.\n', + ) + + +class ModelServingEndpointsConfigServedEntitiesExternalModelPalmConfig(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + palm_api_key: str = Field( + ..., description='The Databricks secret key reference for a PaLM API key.' + ) + + +class ModelServingEndpointsConfigServedEntitiesExternalModel(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + ai21labs_config: Optional[ + ModelServingEndpointsConfigServedEntitiesExternalModelAi21labsConfig + ] = None + amazon_bedrock_config: Optional[ + ModelServingEndpointsConfigServedEntitiesExternalModelAmazonBedrockConfig + ] = None + anthropic_config: Optional[ + ModelServingEndpointsConfigServedEntitiesExternalModelAnthropicConfig + ] = None + cohere_config: Optional[ + ModelServingEndpointsConfigServedEntitiesExternalModelCohereConfig + ] = None + databricks_model_serving_config: Optional[ + ModelServingEndpointsConfigServedEntitiesExternalModelDatabricksModelServingConfig + ] = None + name: str = Field(..., description='The name of the external model.') + openai_config: Optional[ + ModelServingEndpointsConfigServedEntitiesExternalModelOpenaiConfig + ] = None + palm_config: Optional[ + ModelServingEndpointsConfigServedEntitiesExternalModelPalmConfig + ] = None + provider: str = Field( + ..., + description='The name of the provider for the external model. Currently, the supported providers are \'ai21labs\', \'anthropic\',\n\'amazon-bedrock\', \'cohere\', \'databricks-model-serving\', \'openai\', and \'palm\'.",\n', + ) + task: str = Field(..., description='The task type of the external model.') + + +class ModelServingEndpointsConfigServedEntities(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + entity_name: Optional[str] = Field( + None, + description='The name of the entity to be served. The entity may be a model in the Databricks Model Registry, a model in the Unity Catalog (UC),\nor a function of type FEATURE_SPEC in the UC. If it is a UC object, the full name of the object should be given in the form of\n__catalog_name__.__schema_name__.__model_name__.\n', + ) + entity_version: Optional[str] = Field( + None, + description='The version of the model in Databricks Model Registry to be served or empty if the entity is a FEATURE_SPEC.', + ) + environment_vars: Optional[Dict[str, str]] = None + external_model: Optional[ + ModelServingEndpointsConfigServedEntitiesExternalModel + ] = None + instance_profile_arn: Optional[str] = Field( + None, + description='ARN of the instance profile that the served entity uses to access AWS resources.', + ) + max_provisioned_throughput: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='The maximum tokens per second that the endpoint can scale up to.', + ) + min_provisioned_throughput: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='The minimum tokens per second that the endpoint can scale down to.', + ) + name: Optional[str] = Field( + None, + description="The name of a served entity. It must be unique across an endpoint. A served entity name can consist of alphanumeric characters, dashes, and underscores.\nIf not specified for an external model, this field defaults to external_model.name, with '.' and ':' replaced with '-', and if not specified for other\nentities, it defaults to -.\n", + ) + scale_to_zero_enabled: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='Whether the compute resources for the served entity should scale down to zero.', + ) + workload_size: Optional[str] = Field( + None, + description='The workload size of the served entity. The workload size corresponds to a range of provisioned concurrency that the compute autoscales between.\nA single unit of provisioned concurrency can process one request at a time.\nValid workload sizes are "Small" (4 - 4 provisioned concurrency), "Medium" (8 - 16 provisioned concurrency), and "Large" (16 - 64 provisioned concurrency).\nIf scale-to-zero is enabled, the lower bound of the provisioned concurrency for each workload size is 0.\n', + ) + workload_type: Optional[str] = Field( + None, + description='The workload type of the served entity. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n"CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n', + ) + + class ModelServingEndpointsConfigServedModels(BaseModel): class Config: extra = "forbid" + protected_namespaces = () environment_vars: Optional[Dict[str, str]] = None instance_profile_arn: Optional[str] = Field( @@ -1690,7 +2780,12 @@ class Config: None, description='The name of a served model. It must be unique across an endpoint. If not specified, this field will default to -.\nA served model name can consist of alphanumeric characters, dashes, and underscores.\n', ) - scale_to_zero_enabled: bool = Field( + scale_to_zero_enabled: Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] = Field( ..., description='Whether the compute resources for the served model should scale down to zero.', ) @@ -1700,19 +2795,25 @@ class Config: ) workload_type: Optional[str] = Field( None, - description='The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n"CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others. See documentation for all\noptions.\n', + description='The workload type of the served model. The workload type selects which type of compute to use in the endpoint. The default value for this parameter is\n"CPU". For deep learning workloads, GPU acceleration is available by selecting workload types like GPU_SMALL and others.\nSee the available [GPU types](https://docs.databricks.com/machine-learning/model-serving/create-manage-serving-endpoints.html#gpu-workload-types).\n', ) class ModelServingEndpointsConfigTrafficConfigRoutes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () served_model_name: str = Field( ..., description='The name of the served model this route configures traffic for.', ) - traffic_percentage: float = Field( + traffic_percentage: Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] = Field( ..., description='The percentage of endpoint traffic to send to this route. It must be an integer between 0 and 100 inclusive.', ) @@ -1721,19 +2822,27 @@ class Config: class ModelServingEndpointsConfigTrafficConfig(BaseModel): class Config: extra = "forbid" + protected_namespaces = () routes: Optional[List[ModelServingEndpointsConfigTrafficConfigRoutes]] = Field( - None, description='The list of routes that define traffic to each served model.' + None, + description='The list of routes that define traffic to each served entity.', ) class ModelServingEndpointsConfig(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - served_models: List[ModelServingEndpointsConfigServedModels] = Field( - ..., - description='A list of served models for the endpoint to serve. A serving endpoint can have up to 10 served models.', + auto_capture_config: Optional[ModelServingEndpointsConfigAutoCaptureConfig] = None + served_entities: Optional[List[ModelServingEndpointsConfigServedEntities]] = Field( + None, + description='A list of served entities for the endpoint to serve. A serving endpoint can have up to 15 served entities.', + ) + served_models: Optional[List[ModelServingEndpointsConfigServedModels]] = Field( + None, + description='(Deprecated, use served_entities instead) A list of served models for the endpoint to serve. A serving endpoint can have up to 15 served models.', ) traffic_config: Optional[ModelServingEndpointsConfigTrafficConfig] = None @@ -1741,6 +2850,7 @@ class Config: class ModelServingEndpointsPermissions(BaseModel): class Config: extra = "forbid" + protected_namespaces = () group_name: Optional[str] = None level: str @@ -1748,9 +2858,34 @@ class Config: user_name: Optional[str] = None +class ModelServingEndpointsRateLimits(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + calls: Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] = Field( + ..., + description='Used to specify how many calls are allowed for a key within the renewal_period.', + ) + key: Optional[str] = Field( + None, + description="Key field for a serving endpoint rate limit. Currently, only 'user' and 'endpoint' are supported, with 'endpoint' being the default if not specified.", + ) + renewal_period: str = Field( + ..., + description="Renewal period field for a serving endpoint rate limit. Currently, only 'minute' is supported.", + ) + + class ModelServingEndpointsTags(BaseModel): class Config: extra = "forbid" + protected_namespaces = () key: str = Field(..., description='Key field for a serving endpoint tag.') value: Optional[str] = Field( @@ -1761,6 +2896,7 @@ class Config: class ModelServingEndpoints(BaseModel): class Config: extra = "forbid" + protected_namespaces = () config: ModelServingEndpointsConfig name: str = Field( @@ -1768,6 +2904,18 @@ class Config: description='The name of the serving endpoint. This field is required and must be unique across a Databricks workspace.\nAn endpoint name can consist of alphanumeric characters, dashes, and underscores.\n', ) permissions: Optional[List[ModelServingEndpointsPermissions]] = None + rate_limits: Optional[List[ModelServingEndpointsRateLimits]] = Field( + None, + description='Rate limits to be applied to the serving endpoint. NOTE: only external and foundation model endpoints are supported as of now.', + ) + route_optimized: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='Enable route optimization for the serving endpoint.') tags: Optional[List[ModelServingEndpointsTags]] = Field( None, description='Tags to be attached to the serving endpoint and automatically propagated to billing logs.', @@ -1777,6 +2925,7 @@ class Config: class ModelsLatestVersionsTags(BaseModel): class Config: extra = "forbid" + protected_namespaces = () key: Optional[str] = Field(None, description='The tag key.') value: Optional[str] = Field(None, description='The tag value.') @@ -1785,8 +2934,16 @@ class Config: class ModelsLatestVersions(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - creation_timestamp: Optional[float] = Field( + creation_timestamp: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Timestamp recorded when this `model_version` was created.' ) current_stage: Optional[str] = Field( @@ -1795,7 +2952,14 @@ class Config: description: Optional[str] = Field( None, description='Description of this `model_version`.' ) - last_updated_timestamp: Optional[float] = Field( + last_updated_timestamp: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Timestamp recorded when metadata for this `model_version` was last updated.', ) @@ -1828,6 +2992,7 @@ class Config: class ModelsPermissions(BaseModel): class Config: extra = "forbid" + protected_namespaces = () group_name: Optional[str] = None level: str @@ -1838,6 +3003,7 @@ class Config: class ModelsTags(BaseModel): class Config: extra = "forbid" + protected_namespaces = () key: Optional[str] = Field(None, description='The tag key.') value: Optional[str] = Field(None, description='The tag value.') @@ -1846,14 +3012,29 @@ class Config: class Models(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - creation_timestamp: Optional[float] = Field( + creation_timestamp: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Timestamp recorded when this `registered_model` was created.' ) description: Optional[str] = Field( None, description='Description of this `registered_model`.' ) - last_updated_timestamp: Optional[float] = Field( + last_updated_timestamp: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Timestamp recorded when metadata for this `registered_model` was last updated.', ) @@ -1875,36 +3056,91 @@ class Config: class PipelinesClustersAutoscale(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - max_workers: float = Field( + max_workers: Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] = Field( ..., - description='The maximum number of workers to which the cluster can scale up when overloaded.\nNote that `max_workers` must be strictly greater than `min_workers`.', + description='The maximum number of workers to which the cluster can scale up when overloaded. `max_workers` must be strictly greater than `min_workers`.', ) - min_workers: float = Field( + min_workers: Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] = Field( ..., - description='The minimum number of workers to which the cluster can scale down when underutilized.\nIt is also the initial number of workers the cluster will have after creation.', + description='The minimum number of workers the cluster can scale down to when underutilized.\nIt is also the initial number of workers the cluster will have after creation.', ) - + mode: Optional[str] = Field( + None, + description='Databricks Enhanced Autoscaling optimizes cluster utilization by automatically\nallocating cluster resources based on workload volume, with minimal impact to\nthe data processing latency of your pipelines. Enhanced Autoscaling is available\nfor `updates` clusters only. The legacy autoscaling feature is used for `maintenance`\nclusters.\n', + ) + class PipelinesClustersAwsAttributes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () availability: Optional[str] = None - ebs_volume_count: Optional[float] = Field( + ebs_volume_count: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The number of volumes launched for each instance. Users can choose up to 10 volumes.\nThis feature is only enabled for supported node types. Legacy node types cannot specify\ncustom EBS volumes.\nFor node types with no instance store, at least one EBS volume needs to be specified;\notherwise, cluster creation will fail.\n\nThese EBS volumes will be mounted at `/ebs0`, `/ebs1`, and etc.\nInstance store volumes will be mounted at `/local_disk0`, `/local_disk1`, and etc.\n\nIf EBS volumes are attached, Databricks will configure Spark to use only the EBS volumes for\nscratch storage because heterogenously sized scratch devices can lead to inefficient disk\nutilization. If no EBS volumes are attached, Databricks will configure Spark to use instance\nstore volumes.\n\nPlease note that if EBS volumes are specified, then the Spark configuration `spark.local.dir`\nwill be overridden.', ) - ebs_volume_iops: Optional[float] = Field(None, description='') - ebs_volume_size: Optional[float] = Field( + ebs_volume_iops: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If using gp3 volumes, what IOPS to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.', + ) + ebs_volume_size: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The size of each EBS volume (in GiB) launched for each instance. For general purpose\nSSD, this value must be within the range 100 - 4096. For throughput optimized HDD,\nthis value must be within the range 500 - 4096.', ) - ebs_volume_throughput: Optional[float] = Field( - None, description='' + ebs_volume_throughput: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If using gp3 volumes, what throughput to use for the disk. If this is not set, the maximum performance of a gp2 volume with the same volume size will be used.', ) ebs_volume_type: Optional[str] = None - first_on_demand: Optional[float] = Field( + first_on_demand: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nIf this value is greater than 0, the cluster driver node in particular will be placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.', ) @@ -1912,7 +3148,14 @@ class Config: None, description='Nodes for this cluster will only be placed on AWS instances with this instance profile. If\nommitted, nodes will be placed on instances without an IAM instance profile. The instance\nprofile must have previously been added to the Databricks environment by an account\nadministrator.\n\nThis feature may only be available to certain customer plans.\n\nIf this field is ommitted, we will pull in the default from the conf if it exists.', ) - spot_bid_price_percent: Optional[float] = Field( + spot_bid_price_percent: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description="The bid price for AWS spot instances, as a percentage of the corresponding instance type's\non-demand price.\nFor example, if this field is set to 50, and the cluster needs a new `r3.xlarge` spot\ninstance, then the bid price is half of the price of\non-demand `r3.xlarge` instances. Similarly, if this field is set to 200, the bid price is twice\nthe price of on-demand `r3.xlarge` instances. If not specified, the default value is 100.\nWhen spot instances are requested for this cluster, only spot instances whose bid price\npercentage matches this field will be considered.\nNote that, for safety, we enforce this field to be no more than 10000.\n\nThe default value and documentation here should be kept consistent with\nCommonConf.defaultSpotBidPricePercent and CommonConf.maxSpotBidPricePercent.", ) @@ -1925,6 +3168,7 @@ class Config: class PipelinesClustersAzureAttributesLogAnalyticsInfo(BaseModel): class Config: extra = "forbid" + protected_namespaces = () log_analytics_primary_key: Optional[str] = Field( None, description='' @@ -1937,16 +3181,31 @@ class Config: class PipelinesClustersAzureAttributes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () availability: Optional[str] = None - first_on_demand: Optional[float] = Field( + first_on_demand: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The first `first_on_demand` nodes of the cluster will be placed on on-demand instances.\nThis value should be greater than 0, to make sure the cluster driver node is placed on an\non-demand instance. If this value is greater than or equal to the current cluster size, all\nnodes will be placed on on-demand instances. If this value is less than the current cluster\nsize, `first_on_demand` nodes will be placed on on-demand instances and the remainder will\nbe placed on `availability` instances. Note that this value does not affect\ncluster size and cannot currently be mutated over the lifetime of a cluster.', ) log_analytics_info: Optional[ PipelinesClustersAzureAttributesLogAnalyticsInfo ] = None - spot_bid_max_price: Optional[float] = Field( + spot_bid_max_price: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='The max bid price to be used for Azure spot instances.\nThe Max price for the bid cannot be higher than the on-demand price of the instance.\nIf not specified, the default value is -1, which specifies that the instance cannot be evicted\non the basis of price, and only on the basis of availability. Further, the value should > 0 or -1.', ) @@ -1955,25 +3214,32 @@ class Config: class PipelinesClustersClusterLogConfDbfs(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - destination: Optional[str] = Field( - None, description='dbfs destination, e.g. `dbfs:/my/path`' - ) + destination: str = Field(..., description='dbfs destination, e.g. `dbfs:/my/path`') class PipelinesClustersClusterLogConfS(BaseModel): class Config: extra = "forbid" + protected_namespaces = () canned_acl: Optional[str] = Field( None, description='(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs.', ) - destination: Optional[str] = Field( - None, + destination: str = Field( + ..., description='S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs.', ) - enable_encryption: Optional[bool] = Field( + enable_encryption: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='(Optional) Flag to enable server side encryption, `false` by default.', ) @@ -1998,6 +3264,7 @@ class Config: class PipelinesClustersClusterLogConf(BaseModel): class Config: extra = "forbid" + protected_namespaces = () dbfs: Optional[PipelinesClustersClusterLogConfDbfs] = None s3: Optional[PipelinesClustersClusterLogConfS] = None @@ -2006,24 +3273,179 @@ class Config: class PipelinesClustersGcpAttributes(BaseModel): class Config: extra = "forbid" + protected_namespaces = () availability: Optional[str] = None - boot_disk_size: Optional[float] = Field(None, description='boot disk size in GB') + boot_disk_size: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='boot disk size in GB') google_service_account: Optional[str] = Field( None, description='If provided, the cluster will impersonate the google service account when accessing\ngcloud services (like GCS). The google service account\nmust have previously been added to the Databricks environment by an account\nadministrator.', ) - local_ssd_count: Optional[float] = Field( + local_ssd_count: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='If provided, each node (workers and driver) in the cluster will have this number of local SSDs attached. Each local SSD is 375GB in size. Refer to [GCP documentation](https://cloud.google.com/compute/docs/disks/local-ssd#choose_number_local_ssds) for the supported number of local SSDs for each instance type.', ) + use_preemptible_executors: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='This field determines whether the spark executors will be scheduled to run on preemptible VMs (when set to true) versus standard compute engine VMs (when set to false; default).\nNote: Soon to be deprecated, use the availability field instead.', + ) + zone_id: Optional[str] = Field( + None, + description='Identifier for the availability zone in which the cluster resides.\nThis can be one of the following:\n- "HA" => High availability, spread nodes across availability zones for a Databricks deployment region [default]\n- "AUTO" => Databricks picks an availability zone to schedule the cluster on.\n- A GCP availability zone => Pick One of the available zones for (machine type + region) from https://cloud.google.com/compute/docs/regions-zones.', + ) + + +class PipelinesClustersInitScriptsAbfss(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination: str = Field( + ..., + description='abfss destination, e.g. `abfss://@.dfs.core.windows.net/`.', + ) + + +class PipelinesClustersInitScriptsDbfs(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination: str = Field(..., description='dbfs destination, e.g. `dbfs:/my/path`') + + +class PipelinesClustersInitScriptsFile(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination: str = Field( + ..., description='local file destination, e.g. `file:/my/local/file.sh`' + ) + + +class PipelinesClustersInitScriptsGcs(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination: str = Field( + ..., description='GCS destination/URI, e.g. `gs://my-bucket/some-prefix`' + ) + + +class PipelinesClustersInitScriptsS(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + canned_acl: Optional[str] = Field( + None, + description='(Optional) Set canned access control list for the logs, e.g. `bucket-owner-full-control`.\nIf `canned_cal` is set, please make sure the cluster iam role has `s3:PutObjectAcl` permission on\nthe destination bucket and prefix. The full list of possible canned acl can be found at\nhttp://docs.aws.amazon.com/AmazonS3/latest/dev/acl-overview.html#canned-acl.\nPlease also note that by default only the object owner gets full controls. If you are using cross account\nrole for writing data, you may want to set `bucket-owner-full-control` to make bucket owner able to\nread the logs.', + ) + destination: str = Field( + ..., + description='S3 destination, e.g. `s3://my-bucket/some-prefix` Note that logs will be delivered using\ncluster iam role, please make sure you set cluster iam role and the role has write access to the\ndestination. Please also note that you cannot use AWS keys to deliver logs.', + ) + enable_encryption: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='(Optional) Flag to enable server side encryption, `false` by default.', + ) + encryption_type: Optional[str] = Field( + None, + description='(Optional) The encryption type, it could be `sse-s3` or `sse-kms`. It will be used only when\nencryption is enabled and the default type is `sse-s3`.', + ) + endpoint: Optional[str] = Field( + None, + description='S3 endpoint, e.g. `https://s3-us-west-2.amazonaws.com`. Either region or endpoint needs to be set.\nIf both are set, endpoint will be used.', + ) + kms_key: Optional[str] = Field( + None, + description='(Optional) Kms key which will be used if encryption is enabled and encryption type is set to `sse-kms`.', + ) + region: Optional[str] = Field( + None, + description='S3 region, e.g. `us-west-2`. Either region or endpoint needs to be set. If both are set,\nendpoint will be used.', + ) + + +class PipelinesClustersInitScriptsVolumes(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination: str = Field( + ..., + description='Unity Catalog Volumes file destination, e.g. `/Volumes/my-init.sh`', + ) + + +class PipelinesClustersInitScriptsWorkspace(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination: str = Field( + ..., + description='workspace files destination, e.g. `/Users/user1@databricks.com/my-init.sh`', + ) + + +class PipelinesClustersInitScripts(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + abfss: Optional[PipelinesClustersInitScriptsAbfss] = None + dbfs: Optional[PipelinesClustersInitScriptsDbfs] = None + file: Optional[PipelinesClustersInitScriptsFile] = None + gcs: Optional[PipelinesClustersInitScriptsGcs] = None + s3: Optional[PipelinesClustersInitScriptsS] = None + volumes: Optional[PipelinesClustersInitScriptsVolumes] = None + workspace: Optional[PipelinesClustersInitScriptsWorkspace] = None class PipelinesClusters(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - apply_policy_default_values: Optional[bool] = Field( + apply_policy_default_values: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description="Note: This field won't be persisted. Only API users will check this field.", ) @@ -2041,6 +3463,10 @@ class Config: description='The node type of the Spark driver.\nNote that this field is optional; if unset, the driver node type will be set as the same value\nas `node_type_id` defined above.', ) gcp_attributes: Optional[PipelinesClustersGcpAttributes] = None + init_scripts: Optional[List[PipelinesClustersInitScripts]] = Field( + None, + description='The configuration for storing init scripts. Any number of destinations can be specified. The scripts are executed sequentially in the order provided. If `cluster_log_conf` is specified, init script logs are sent to `//init_scripts`.', + ) instance_pool_id: Optional[str] = Field( None, description='The optional ID of the instance pool to which the cluster belongs.', @@ -2053,7 +3479,14 @@ class Config: None, description='This field encodes, through a single value, the resources available to each of\nthe Spark nodes in this cluster. For example, the Spark nodes can be provisioned\nand optimized for memory or compute intensive workloads. A list of available node\ntypes can be retrieved by using the :method:clusters/listNodeTypes API call.\n', ) - num_workers: Optional[float] = Field( + num_workers: Optional[ + Union[ + float, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Number of worker nodes that this cluster should have. A cluster has one Spark Driver\nand `num_workers` Executors for a total of `num_workers` + 1 Spark nodes.\n\nNote: When reading the properties of a cluster, this field reflects the desired number\nof workers rather than the actual current number of workers. For instance, if a cluster\nis resized from 5 to 10 workers, this field will immediately be updated to reflect\nthe target size of 10 workers, whereas the workers listed in `spark_info` will gradually\nincrease from 5 to 10 as the new nodes are provisioned.', ) @@ -2069,17 +3502,214 @@ class Config: ) +class PipelinesDeployment(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + kind: Optional[str] = Field( + None, description='The deployment method that manages the pipeline.' + ) + metadata_file_path: Optional[str] = Field( + None, + description='The path to the file containing metadata about the deployment.', + ) + + class PipelinesFilters(BaseModel): class Config: extra = "forbid" + protected_namespaces = () exclude: Optional[List[str]] = Field(None, description='Paths to exclude.') include: Optional[List[str]] = Field(None, description='Paths to include.') +class PipelinesGatewayDefinition(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + connection_id: Optional[str] = Field( + None, + description='Immutable. The Unity Catalog connection this gateway pipeline uses to communicate with the source.', + ) + gateway_storage_catalog: Optional[str] = Field( + None, + description="Required, Immutable. The name of the catalog for the gateway pipeline's storage location.", + ) + gateway_storage_name: Optional[str] = Field( + None, + description='Required. The Unity Catalog-compatible naming for the gateway storage location.\nThis is the destination to use for the data that is extracted by the gateway.\nDelta Live Tables system will automatically create the storage location under the catalog and schema.\n', + ) + gateway_storage_schema: Optional[str] = Field( + None, + description="Required, Immutable. The name of the schema for the gateway pipelines's storage location.", + ) + + +class PipelinesIngestionDefinitionObjectsSchemaTableConfiguration(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + primary_keys: Optional[List[str]] = Field( + None, description='The primary key of the table used to apply changes.' + ) + salesforce_include_formula_fields: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector', + ) + scd_type: Optional[str] = Field( + None, description='The SCD type to use to ingest the table.' + ) + + +class PipelinesIngestionDefinitionObjectsSchema(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination_catalog: Optional[str] = Field( + None, description='Required. Destination catalog to store tables.' + ) + destination_schema: Optional[str] = Field( + None, + description='Required. Destination schema to store tables in. Tables with the same name as the source tables are created in this destination schema. The pipeline fails If a table with the same name already exists.', + ) + source_catalog: Optional[str] = Field( + None, + description='The source catalog name. Might be optional depending on the type of source.', + ) + source_schema: Optional[str] = Field( + None, description='Required. Schema name in the source database.' + ) + table_configuration: Optional[ + PipelinesIngestionDefinitionObjectsSchemaTableConfiguration + ] = None + + +class PipelinesIngestionDefinitionObjectsTableTableConfiguration(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + primary_keys: Optional[List[str]] = Field( + None, description='The primary key of the table used to apply changes.' + ) + salesforce_include_formula_fields: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector', + ) + scd_type: Optional[str] = Field( + None, description='The SCD type to use to ingest the table.' + ) + + +class PipelinesIngestionDefinitionObjectsTable(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + destination_catalog: Optional[str] = Field( + None, description='Required. Destination catalog to store table.' + ) + destination_schema: Optional[str] = Field( + None, description='Required. Destination schema to store table.' + ) + destination_table: Optional[str] = Field( + None, + description='Optional. Destination table name. The pipeline fails If a table with that name already exists. If not set, the source table name is used.', + ) + source_catalog: Optional[str] = Field( + None, + description='Source catalog name. Might be optional depending on the type of source.', + ) + source_schema: Optional[str] = Field( + None, + description='Schema name in the source database. Might be optional depending on the type of source.', + ) + source_table: Optional[str] = Field( + None, description='Required. Table name in the source database.' + ) + table_configuration: Optional[ + PipelinesIngestionDefinitionObjectsTableTableConfiguration + ] = None + + +class PipelinesIngestionDefinitionObjects(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + schema_: Optional[PipelinesIngestionDefinitionObjectsSchema] = Field( + None, alias='schema' + ) + table: Optional[PipelinesIngestionDefinitionObjectsTable] = None + + +class PipelinesIngestionDefinitionTableConfiguration(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + primary_keys: Optional[List[str]] = Field( + None, description='The primary key of the table used to apply changes.' + ) + salesforce_include_formula_fields: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( + None, + description='If true, formula fields defined in the table are included in the ingestion. This setting is only valid for the Salesforce connector', + ) + scd_type: Optional[str] = Field( + None, description='The SCD type to use to ingest the table.' + ) + + +class PipelinesIngestionDefinition(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + connection_name: Optional[str] = Field( + None, + description='Immutable. The Unity Catalog connection this ingestion pipeline uses to communicate with the source. Specify either ingestion_gateway_id or connection_name.', + ) + ingestion_gateway_id: Optional[str] = Field( + None, + description='Immutable. Identifier for the ingestion gateway used by this ingestion pipeline to communicate with the source. Specify either ingestion_gateway_id or connection_name.', + ) + objects: Optional[List[PipelinesIngestionDefinitionObjects]] = Field( + None, + description='Required. Settings specifying tables to replicate and the destination for the replicated tables.', + ) + table_configuration: Optional[PipelinesIngestionDefinitionTableConfiguration] = None + + class PipelinesLibrariesFile(BaseModel): class Config: extra = "forbid" + protected_namespaces = () path: Optional[str] = Field(None, description='The absolute path of the file.') @@ -2087,6 +3717,7 @@ class Config: class PipelinesLibrariesMaven(BaseModel): class Config: extra = "forbid" + protected_namespaces = () coordinates: str = Field( ..., @@ -2105,6 +3736,7 @@ class Config: class PipelinesLibrariesNotebook(BaseModel): class Config: extra = "forbid" + protected_namespaces = () path: Optional[str] = Field(None, description='The absolute path of the notebook.') @@ -2112,6 +3744,7 @@ class Config: class PipelinesLibraries(BaseModel): class Config: extra = "forbid" + protected_namespaces = () file: Optional[PipelinesLibrariesFile] = None jar: Optional[str] = Field( @@ -2125,6 +3758,7 @@ class Config: class PipelinesNotifications(BaseModel): class Config: extra = "forbid" + protected_namespaces = () alerts: Optional[List[str]] = Field( None, @@ -2139,6 +3773,7 @@ class Config: class PipelinesPermissions(BaseModel): class Config: extra = "forbid" + protected_namespaces = () group_name: Optional[str] = None level: str @@ -2149,22 +3784,33 @@ class Config: class PipelinesTriggerCron(BaseModel): class Config: extra = "forbid" + protected_namespaces = () quartz_cron_schedule: Optional[str] = None timezone_id: Optional[str] = None +class PipelinesTriggerManual(BaseModel): + pass + + class Config: + extra = "forbid" + protected_namespaces = () + + class PipelinesTrigger(BaseModel): class Config: extra = "forbid" + protected_namespaces = () cron: Optional[PipelinesTriggerCron] = None - manual: Optional[Any] = None + manual: Optional[PipelinesTriggerManual] = None class Pipelines(BaseModel): class Config: extra = "forbid" + protected_namespaces = () catalog: Optional[str] = Field( None, @@ -2177,17 +3823,34 @@ class Config: None, description='Cluster settings for this pipeline deployment.' ) configuration: Optional[Dict[str, str]] = None - continuous: Optional[bool] = Field( + continuous: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Whether the pipeline is continuous or triggered. This replaces `trigger`.', ) - development: Optional[bool] = Field( + deployment: Optional[PipelinesDeployment] = None + development: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Whether the pipeline is in Development mode. Defaults to false.', ) edition: Optional[str] = Field(None, description='Pipeline product edition.') filters: Optional[PipelinesFilters] = None + gateway_definition: Optional[PipelinesGatewayDefinition] = None id: Optional[str] = Field(None, description='Unique identifier for this pipeline.') + ingestion_definition: Optional[PipelinesIngestionDefinition] = None libraries: Optional[List[PipelinesLibraries]] = Field( None, description='Libraries or code needed by this deployment.' ) @@ -2198,10 +3861,22 @@ class Config: None, description='List of notification settings for this pipeline.' ) permissions: Optional[List[PipelinesPermissions]] = None - photon: Optional[bool] = Field( - None, description='Whether Photon is enabled for this pipeline.' - ) - serverless: Optional[bool] = Field( + photon: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field(None, description='Whether Photon is enabled for this pipeline.') + serverless: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = Field( None, description='Whether serverless compute is enabled for this pipeline.' ) storage: Optional[str] = Field( @@ -2217,6 +3892,7 @@ class Config: class RegisteredModelsGrants(BaseModel): class Config: extra = "forbid" + protected_namespaces = () principal: str privileges: List[str] @@ -2225,6 +3901,7 @@ class Config: class RegisteredModels(BaseModel): class Config: extra = "forbid" + protected_namespaces = () catalog_name: str = Field( ..., @@ -2247,6 +3924,7 @@ class Config: class Resources(BaseModel): class Config: extra = "forbid" + protected_namespaces = () experiments: Optional[Dict[str, Experiments]] = None jobs: Optional[Dict[str, Jobs]] = None @@ -2259,6 +3937,7 @@ class Config: class RunAs(BaseModel): class Config: extra = "forbid" + protected_namespaces = () service_principal_name: Optional[str] = None user_name: Optional[str] = None @@ -2267,69 +3946,85 @@ class Config: class Sync(BaseModel): class Config: extra = "forbid" + protected_namespaces = () exclude: Optional[List[str]] = None include: Optional[List[str]] = None +class VariablesLookup(BaseModel): + class Config: + extra = "forbid" + protected_namespaces = () + + alert: Optional[str] = None + cluster: Optional[str] = None + cluster_policy: Optional[str] = None + dashboard: Optional[str] = None + instance_pool: Optional[str] = None + job: Optional[str] = None + metastore: Optional[str] = None + pipeline: Optional[str] = None + query: Optional[str] = None + service_principal: Optional[str] = None + warehouse: Optional[str] = None + + class Variables(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - default: Optional[str] = None + default: Optional[Union[str, bool, float, int]] = None description: Optional[str] = None + lookup: Optional[VariablesLookup] = None class Workspace(BaseModel): class Config: extra = "forbid" + protected_namespaces = () - artifact_path: Optional[str] = Field( - None, - description='The remote path to synchronize build artifacts to. This defaults to `${workspace.root}/artifacts`', - ) + artifact_path: Optional[str] = None auth_type: Optional[str] = None azure_client_id: Optional[str] = None - azure_environment: Optional[str] = Field( - None, description='Azure environment, one of (Public, UsGov, China, Germany).' - ) - azure_login_app_id: Optional[str] = Field( - None, description='Azure Login Application ID.' - ) + azure_environment: Optional[str] = None + azure_login_app_id: Optional[str] = None azure_tenant_id: Optional[str] = None - azure_use_msi: Optional[bool] = None - azure_workspace_resource_id: Optional[str] = Field( - None, description='Azure Resource Manager ID for Azure Databricks workspace.' - ) + azure_use_msi: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = None + azure_workspace_resource_id: Optional[str] = None client_id: Optional[str] = None - file_path: Optional[str] = Field( - None, - description='The remote path to synchronize local files artifacts to. This defaults to `${workspace.root}/files`', - ) + file_path: Optional[str] = None google_service_account: Optional[str] = None - host: Optional[str] = Field(None, description='Host url of the workspace.') - profile: Optional[str] = Field( - None, - description='Connection profile to use. By default profiles are specified in ~/.databrickscfg.', - ) - root_path: Optional[str] = Field( - None, - description='The base location for synchronizing files, artifacts and state. Defaults to `/Users/jane@doe.com/.bundle/${bundle.name}/${bundle.target}`', - ) - state_path: Optional[str] = Field( - None, - description='The remote path to synchronize bundle state to. This defaults to `${workspace.root}/state`', - ) + host: Optional[str] = None + profile: Optional[str] = None + root_path: Optional[str] = None + state_path: Optional[str] = None class Targets(BaseModel): class Config: extra = "forbid" + protected_namespaces = () artifacts: Optional[Dict[str, Artifacts]] = None bundle: Optional[Bundle] = None compute_id: Optional[str] = None - default: Optional[bool] = None + default: Optional[ + Union[ + bool, + constr( + pattern=r'\$\{([a-zA-Z]+([-_]?[a-zA-Z0-9]+)*(\.[a-zA-Z]+([-_]?[a-zA-Z0-9]+)*)*)\}' + ), + ] + ] = None git: Optional[Git] = None mode: Optional[str] = None permissions: Optional[List[Permissions]] = None @@ -2343,14 +4038,12 @@ class Config: class DatabricksAssetBundles(BaseModel): class Config: extra = "forbid" + protected_namespaces = () artifacts: Optional[Dict[str, Artifacts]] = None bundle: Optional[Bundle] = None experimental: Optional[Experimental] = None - include: Optional[List[str]] = Field( - None, - description='A list of glob patterns of files to load and merge into the this configuration. Defaults to no files being included.', - ) + include: Optional[List[str]] = None permissions: Optional[List[Permissions]] = None resources: Optional[Resources] = None run_as: Optional[RunAs] = None diff --git a/tests/engine/test_utils.py b/tests/engine/test_utils.py index e10fd71b..64376413 100644 --- a/tests/engine/test_utils.py +++ b/tests/engine/test_utils.py @@ -57,7 +57,7 @@ def test_get_job_id_non_200(self, caplog, api): def test_get_bf_project_root(self): # Set up expected path which is the root of the repo - expected_root = pathlib.Path("/__w/brickflow") + expected_root = pathlib.Path.cwd().parents[0] # Execute the function actual_root = get_bf_project_root() # Assert the result diff --git a/tools/modify_model.py b/tools/modify_model.py index c9e34817..928f9c86 100644 --- a/tools/modify_model.py +++ b/tools/modify_model.py @@ -27,7 +27,13 @@ def remove_timestamp_line(input_code: str) -> str: def replace_class_config_extras(input_code: str) -> str: pattern = r"extra\s*=\s*Extra\.forbid" - return re.sub(pattern, 'extra = "forbid"', input_code) + return re.sub( + pattern, 'extra = "forbid"\n protected_namespaces = ()', input_code + ) + + def replace_regex_with_pattern(input_code: str) -> str: + pattern = r"regex=" + return re.sub(pattern, "pattern=", input_code) with open(file_path, "r") as f: lines = f.readlines() @@ -49,4 +55,6 @@ def replace_class_config_extras(input_code: str) -> str: data = remove_timestamp_line(data) # remove extra config to remove deprecation warning data = replace_class_config_extras(data) + # replace regex with pattern + data = replace_regex_with_pattern(data) w.write(data)