diff --git a/airflow/providers/google/ads/hooks/ads.py b/airflow/providers/google/ads/hooks/ads.py index daf80e3cd314..fd68d1ae53dd 100644 --- a/airflow/providers/google/ads/hooks/ads.py +++ b/airflow/providers/google/ads/hooks/ads.py @@ -36,14 +36,13 @@ class GoogleAdsHook(BaseHook): - """ - Hook for the Google Ads API. + """Interact with Google Ads API. This hook requires two connections: - - gcp_conn_id - provides service account details (like any other GCP connection) - - google_ads_conn_id - which contains information from Google Ads config.yaml file - in the ``extras``. Example of the ``extras``: + - gcp_conn_id - provides service account details (like any other GCP connection) + - google_ads_conn_id - which contains information from Google Ads config.yaml file + in the ``extras``. Example of the ``extras``: .. code-block:: json @@ -69,8 +68,6 @@ class GoogleAdsHook(BaseHook): :param gcp_conn_id: The connection ID with the service account details. :param google_ads_conn_id: The connection ID with the details of Google Ads config.yaml file. :param api_version: The Google Ads API version to use. - - :return: list of Google Ads Row object(s) """ default_api_version = "v14" @@ -90,10 +87,10 @@ def __init__( def search( self, client_ids: list[str], query: str, page_size: int = 10000, **kwargs ) -> list[GoogleAdsRow]: - """ - Pulls data from the Google Ads API and returns it as native protobuf - message instances (those seen in versions prior to 10.0.0 of the - google-ads library). + """Pull data from the Google Ads API. + + Native protobuf message instances are returned (those seen in versions + prior to 10.0.0 of the google-ads library). This method is for backwards compatibility with older versions of the google_ads_hook. @@ -105,7 +102,7 @@ def search( :param client_ids: Google Ads client ID(s) to query the API for. :param query: Google Ads Query Language query. :param page_size: Number of results to return per page. Max 10000. - :return: Google Ads API response, converted to Google Ads Row objects + :return: Google Ads API response, converted to Google Ads Row objects. """ data_proto_plus = self._search(client_ids, query, page_size, **kwargs) data_native_pb = [row._pb for row in data_proto_plus] @@ -115,9 +112,10 @@ def search( def search_proto_plus( self, client_ids: list[str], query: str, page_size: int = 10000, **kwargs ) -> list[GoogleAdsRow]: - """ - Pulls data from the Google Ads API and returns it as proto-plus-python - message instances that behave more like conventional python objects. + """Pull data from the Google Ads API. + + Instances of proto-plus-python message are returned, which behave more + like conventional Python objects. :param client_ids: Google Ads client ID(s) to query the API for. :param query: Google Ads Query Language query. @@ -127,12 +125,14 @@ def search_proto_plus( return self._search(client_ids, query, page_size, **kwargs) def list_accessible_customers(self) -> list[str]: - """ - Returns resource names of customers directly accessible by the user authenticating the call. - The resulting list of customers is based on your OAuth credentials. The request returns a list - of all accounts that you are able to act upon directly given your current credentials. This will - not necessarily include all accounts within the account hierarchy; rather, it will only include - accounts where your authenticated user has been added with admin or other rights in the account. + """List resource names of customers. + + The resulting list of customers is based on your OAuth credentials. The + request returns a list of all accounts that you are able to act upon + directly given your current credentials. This will not necessarily + include all accounts within the account hierarchy; rather, it will only + include accounts where your authenticated user has been added with admin + or other rights in the account. ..seealso:: https://developers.google.com/google-ads/api/reference/rpc @@ -152,7 +152,7 @@ def list_accessible_customers(self) -> list[str]: @cached_property def _get_service(self) -> GoogleAdsServiceClient: - """Connects and authenticates with the Google Ads API using a service account.""" + """Connect and authenticate with the Google Ads API using a service account.""" client = self._get_client return client.get_service("GoogleAdsService", version=self.api_version) @@ -170,7 +170,7 @@ def _get_client(self) -> GoogleAdsClient: @cached_property def _get_customer_service(self) -> CustomerServiceClient: - """Connects and authenticates with the Google Ads API using a service account.""" + """Connect and authenticate with the Google Ads API using a service account.""" with NamedTemporaryFile("w", suffix=".json") as secrets_temp: self._get_config() self._update_config_with_secret(secrets_temp) @@ -182,9 +182,10 @@ def _get_customer_service(self) -> CustomerServiceClient: raise def _get_config(self) -> None: - """ - Gets google ads connection from meta db and sets google_ads_config attribute with returned config - file. + """Set up Google Ads config from Connection. + + This pulls the connections from db, and uses it to set up + ``google_ads_config``. """ conn = self.get_connection(self.google_ads_conn_id) if "google_ads_client" not in conn.extra_dejson: @@ -193,10 +194,11 @@ def _get_config(self) -> None: self.google_ads_config = conn.extra_dejson["google_ads_client"] def _update_config_with_secret(self, secrets_temp: IO[str]) -> None: - """ - Gets Google Cloud secret from connection and saves the contents to the temp file - Updates google ads config with file path of the temp file containing the secret - Note, the secret must be passed as a file path for Google Ads API. + """Set up Google Cloud config secret from Connection. + + This pulls the connection, saves the contents to a temp file, and point + the config to the path containing the secret. Note that the secret must + be passed as a file path for Google Ads API. """ extras = self.get_connection(self.gcp_conn_id).extra_dejson secret = get_field(extras, "keyfile_dict") @@ -210,8 +212,7 @@ def _update_config_with_secret(self, secrets_temp: IO[str]) -> None: def _search( self, client_ids: list[str], query: str, page_size: int = 10000, **kwargs ) -> list[GoogleAdsRow]: - """ - Pulls data from the Google Ads API. + """Pull data from the Google Ads API. :param client_ids: Google Ads client ID(s) to query the API for. :param query: Google Ads Query Language query. @@ -231,11 +232,9 @@ def _search( return self._extract_rows(iterators) def _extract_rows(self, iterators: list[GRPCIterator]) -> list[GoogleAdsRow]: - """ - Convert Google Page Iterator (GRPCIterator) objects to Google Ads Rows. + """Convert Google Page Iterator (GRPCIterator) objects to Google Ads Rows. :param iterators: List of Google Page Iterator (GRPCIterator) objects - :return: API response for all clients in the form of Google Ads Row object(s) """ try: diff --git a/airflow/providers/google/ads/transfers/ads_to_gcs.py b/airflow/providers/google/ads/transfers/ads_to_gcs.py index 1d4e96f098e7..2483c42dad6c 100644 --- a/airflow/providers/google/ads/transfers/ads_to_gcs.py +++ b/airflow/providers/google/ads/transfers/ads_to_gcs.py @@ -30,10 +30,10 @@ class GoogleAdsToGcsOperator(BaseOperator): - """ - Fetches the daily results from the Google Ads API for 1-n clients - Converts and saves the data as a temporary CSV file - Uploads the CSV to Google Cloud Storage. + """Fetch daily results from the Google Ads API for 1-n clients. + + Converts and saves the data as a temporary CSV file Uploads the CSV to + Google Cloud Storage. .. seealso:: For more information on the Google Ads API, take a look at the API docs: diff --git a/airflow/providers/google/cloud/_internal_client/secret_manager_client.py b/airflow/providers/google/cloud/_internal_client/secret_manager_client.py index 5366e7a9ea30..0d346e029420 100644 --- a/airflow/providers/google/cloud/_internal_client/secret_manager_client.py +++ b/airflow/providers/google/cloud/_internal_client/secret_manager_client.py @@ -30,11 +30,11 @@ class _SecretManagerClient(LoggingMixin): - """ - Retrieves Secrets object from Google Cloud Secrets Manager. This is a common class reused between - SecretsManager and Secrets Hook that provides the shared authentication and verification mechanisms. - This class should not be used directly, use SecretsManager or SecretsHook instead. + """Retrieve Secrets object from Google Cloud Secrets Manager. + This is a common class reused between SecretsManager and Secrets Hook that + provides the shared authentication and verification mechanisms. This class + should not be used directly; use SecretsManager or SecretsHook instead. :param credentials: Credentials used to authenticate to GCP """ @@ -48,11 +48,9 @@ def __init__( @staticmethod def is_valid_secret_name(secret_name: str) -> bool: - """ - Returns true if the secret name is valid. + """Whether the secret name is valid. :param secret_name: name of the secret - :return: """ return bool(re.match(SECRET_ID_PATTERN, secret_name)) @@ -63,8 +61,7 @@ def client(self) -> SecretManagerServiceClient: return _client def get_secret(self, secret_id: str, project_id: str, secret_version: str = "latest") -> str | None: - """ - Get secret value from the Secret Manager. + """Get secret value from the Secret Manager. :param secret_id: Secret Key :param project_id: Project id to use diff --git a/airflow/providers/google/cloud/hooks/bigquery.py b/airflow/providers/google/cloud/hooks/bigquery.py index d46389fa537e..f76fd1b1e1a8 100644 --- a/airflow/providers/google/cloud/hooks/bigquery.py +++ b/airflow/providers/google/cloud/hooks/bigquery.py @@ -15,10 +15,9 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -""" -This module contains a BigQuery Hook, as well as a very basic PEP 249 -implementation for BigQuery. -""" + +"""BigQuery Hook and a very basic PEP 249 implementation for BigQuery.""" + from __future__ import annotations import json @@ -76,8 +75,9 @@ class BigQueryHook(GoogleBaseHook, DbApiHook): - """ - Interact with BigQuery. This hook uses the Google Cloud connection. + """Interact with BigQuery. + + This hook uses the Google Cloud connection. :param gcp_conn_id: The Airflow connection used for GCP credentials. :param use_legacy_sql: This specifies whether to use legacy SQL dialect. @@ -85,9 +85,10 @@ class BigQueryHook(GoogleBaseHook, DbApiHook): :param priority: Specifies a priority for the query. Possible values include INTERACTIVE and BATCH. The default value is INTERACTIVE. - :param api_resource_configs: This contains params configuration applied for Google BigQuery jobs. - :param impersonation_chain: This is the optional service account to impersonate using short term - credentials. + :param api_resource_configs: This contains params configuration applied for + Google BigQuery jobs. + :param impersonation_chain: This is the optional service account to + impersonate using short term credentials. :param labels: The BigQuery resource label. """ @@ -125,7 +126,7 @@ def __init__( self.credentials_path = "bigquery_hook_credentials.json" def get_conn(self) -> BigQueryConnection: - """Returns a BigQuery PEP 249 connection object.""" + """Get a BigQuery PEP 249 connection object.""" service = self.get_service() return BigQueryConnection( service=service, @@ -137,7 +138,7 @@ def get_conn(self) -> BigQueryConnection: ) def get_service(self) -> Resource: - """Returns a BigQuery service object.""" + """Get a BigQuery service object. Deprecated.""" warnings.warn( "This method will be deprecated. Please use `BigQueryHook.get_client` method", AirflowProviderDeprecationWarning, @@ -146,12 +147,10 @@ def get_service(self) -> Resource: return build("bigquery", "v2", http=http_authorized, cache_discovery=False) def get_client(self, project_id: str | None = None, location: str | None = None) -> Client: - """ - Returns authenticated BigQuery Client. + """Get an authenticated BigQuery Client. :param project_id: Project ID for the project which the client acts on behalf of. :param location: Default location for jobs / datasets / tables. - :return: """ return Client( client_info=CLIENT_INFO, @@ -161,15 +160,13 @@ def get_client(self, project_id: str | None = None, location: str | None = None) ) def get_uri(self) -> str: - """Override DbApiHook get_uri method for get_sqlalchemy_engine().""" + """Override from ``DbApiHook`` for ``get_sqlalchemy_engine()``.""" return f"bigquery://{self.project_id}" def get_sqlalchemy_engine(self, engine_kwargs=None): - """ - Get an sqlalchemy_engine object. + """Create an SQLAlchemy engine object. :param engine_kwargs: Kwargs used in :func:`~sqlalchemy.create_engine`. - :return: the created engine. """ if engine_kwargs is None: engine_kwargs = {} @@ -233,7 +230,8 @@ def insert_rows( replace: Any = False, **kwargs, ) -> None: - """ + """Insert rows. + Insertion is currently unsupported. Theoretically, you could use BigQuery's streaming API to insert rows into a table, but this hasn't been implemented. @@ -247,14 +245,14 @@ def get_pandas_df( dialect: str | None = None, **kwargs, ) -> DataFrame: - """ - Returns a Pandas DataFrame for the results produced by a BigQuery - query. The DbApiHook method must be overridden because Pandas - doesn't support PEP 249 connections, except for SQLite. + """Get a Pandas DataFrame for the BigQuery results. + + The DbApiHook method must be overridden because Pandas doesn't support + PEP 249 connections, except for SQLite. - See: - https://github.com/pandas-dev/pandas/blob/055d008615272a1ceca9720dc365a2abd316f353/pandas/io/sql.py#L415 - https://github.com/pandas-dev/pandas/issues/6900 + .. seealso:: + https://github.com/pandas-dev/pandas/blob/055d008615272a1ceca9720dc365a2abd316f353/pandas/io/sql.py#L415 + https://github.com/pandas-dev/pandas/issues/6900 :param sql: The BigQuery SQL to execute. :param parameters: The parameters to render the SQL query with (not @@ -274,8 +272,7 @@ def get_pandas_df( @GoogleBaseHook.fallback_to_default_project_id def table_exists(self, dataset_id: str, table_id: str, project_id: str) -> bool: - """ - Checks for the existence of a table in Google BigQuery. + """Check if a table exists in Google BigQuery. :param project_id: The Google cloud project in which to look for the table. The connection supplied to the hook must provide access to @@ -295,8 +292,7 @@ def table_exists(self, dataset_id: str, table_id: str, project_id: str) -> bool: def table_partition_exists( self, dataset_id: str, table_id: str, partition_id: str, project_id: str ) -> bool: - """ - Checks for the existence of a partition in a table in Google BigQuery. + """Check if a partition exists in Google BigQuery. :param project_id: The Google cloud project in which to look for the table. The connection supplied to the hook must provide access to @@ -330,9 +326,10 @@ def create_empty_table( location: str | None = None, exists_ok: bool = True, ) -> Table: - """ - Creates a new, empty table in the dataset. - To create a view, which is defined by a SQL query, parse a dictionary to 'view' kwarg. + """Create a new, empty table in the dataset. + + To create a view, which is defined by a SQL query, parse a dictionary to + the *view* argument. :param project_id: The project to create the table into. :param dataset_id: The dataset to create the table into. @@ -342,14 +339,16 @@ def create_empty_table( If provided all other parameters are ignored. :param schema_fields: If set, the schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema - :param labels: a dictionary containing labels for the table, passed to BigQuery - :param retry: Optional. How to retry the RPC. - **Example**: :: + .. code-block:: python - schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}] + schema_fields = [ + {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}, + ] + :param labels: a dictionary containing labels for the table, passed to BigQuery + :param retry: Optional. How to retry the RPC. :param time_partitioning: configure optional time partitioning fields i.e. partition by field, type and expiration as per API specifications. @@ -363,20 +362,22 @@ def create_empty_table( If set, it will create a view instead of a table: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition - **Example**: :: + .. code-block:: python - view = { - "query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 1000", - "useLegacySql": False - } + view = { + "query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 1000", + "useLegacySql": False, + } :param materialized_view: [Optional] The materialized view definition. :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: + + .. code-block:: python encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", } + :param num_retries: Maximum number of retries in case of connection problems. :param location: (Optional) The geographic location where the table should reside. :param exists_ok: If ``True``, ignore "already exists" errors when creating the table. @@ -429,10 +430,9 @@ def create_empty_dataset( dataset_reference: dict[str, Any] | None = None, exists_ok: bool = True, ) -> dict[str, Any]: - """ - Create a new empty dataset. + """Create a new empty dataset. - See: https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert. + .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert :param project_id: The name of the project where we want to create an empty a dataset. Don't need to provide, if projectId in dataset_reference. @@ -491,8 +491,7 @@ def get_dataset_tables( max_results: int | None = None, retry: Retry = DEFAULT_RETRY, ) -> list[dict[str, Any]]: - """ - Get the list of tables for a given dataset. + """Get the list of tables for a given dataset. For more information, see: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list @@ -521,8 +520,7 @@ def delete_dataset( delete_contents: bool = False, retry: Retry = DEFAULT_RETRY, ) -> None: - """ - Delete a dataset of Big query in your project. + """Delete a dataset of Big query in your project. :param project_id: The name of the project where we have the dataset. :param dataset_id: The dataset to be delete. @@ -562,17 +560,13 @@ def create_external_table( location: str | None = None, project_id: str | None = None, ) -> Table: - """ - Creates a new external table in the dataset with the data from Google - Cloud Storage. - - See here: - https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource + """Create an external table in the dataset with data from Google Cloud Storage. - This method is deprecated. - Please use `BigQueryHook.create_empty_table` method with passing the `table_resource` object + .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#resource - for more details about these parameters. + This method is deprecated. Please use :func:`.create_empty_table` with + the ``table_resource`` object. See function documentation for more + details about these parameters. :param external_project_dataset_table: The dotted ``(.|:).($)`` BigQuery @@ -618,10 +612,11 @@ def create_external_table( :param labels: A dictionary containing labels for the BiqQuery table. :param description: A string containing the description for the BigQuery table. :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: + + .. code-block:: python encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", } """ warnings.warn( @@ -706,8 +701,7 @@ def update_table( table_id: str | None = None, project_id: str | None = None, ) -> dict[str, Any]: - """ - Change some fields of a table. + """Change some fields of a table. Use ``fields`` to specify which fields to update. At least one field must be provided. If a field is listed in ``fields`` and is ``None`` @@ -757,11 +751,10 @@ def patch_table( require_partition_filter: bool | None = None, encryption_configuration: dict | None = None, ) -> None: - """ - Patch information in an existing table. - It only updates fields that are provided in the request object. + """Patch information in an existing table. - This method is deprecated. Please use `BigQueryHook.update_table` + It only updates fields that are provided in the request object. This + method is deprecated. Please use :func:`.update_table` instead. Reference: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/patch @@ -779,30 +772,35 @@ def patch_table( https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema The supported schema modifications and unsupported schema modification are listed here: https://cloud.google.com/bigquery/docs/managing-table-schemas - **Example**: :: - schema=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}] + .. code-block:: python + + schema = [ + {"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}, + ] :param time_partitioning: [Optional] A dictionary containing time-based partitioning definition for the table. :param view: [Optional] A dictionary containing definition for the view. If set, it will patch a view instead of a table: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition - **Example**: :: + + .. code-block:: python view = { "query": "SELECT * FROM `test-project-id.test_dataset_id.test_table_prefix*` LIMIT 500", - "useLegacySql": False + "useLegacySql": False, } :param require_partition_filter: [Optional] If true, queries over the this table require a partition filter. If false, queries over the table :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: + + .. code-block:: python encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", } """ @@ -852,9 +850,7 @@ def insert_all( skip_invalid_rows: bool = False, fail_on_error: bool = False, ) -> None: - """ - Method to stream data into BigQuery one record at a time without needing - to run a load job. + """Stream data into BigQuery one record at a time without a load job. .. seealso:: For more information, see: @@ -865,8 +861,9 @@ def insert_all( :param table_id: The name of the table :param rows: the rows to insert - **Example or rows**: - rows=[{"json": {"a_key": "a_value_0"}}, {"json": {"a_key": "a_value_1"}}] + .. code-block:: python + + rows = [{"json": {"a_key": "a_value_0"}}, {"json": {"a_key": "a_value_1"}}] :param ignore_unknown_values: [Optional] Accept rows that contain values that do not match the schema. The unknown values are ignored. @@ -906,8 +903,7 @@ def update_dataset( project_id: str | None = None, retry: Retry = DEFAULT_RETRY, ) -> Dataset: - """ - Change some fields of a dataset. + """Change some fields of a dataset. Use ``fields`` to specify which fields to update. At least one field must be provided. If a field is listed in ``fields`` and is ``None`` in @@ -945,11 +941,11 @@ def update_dataset( return dataset def patch_dataset(self, dataset_id: str, dataset_resource: dict, project_id: str | None = None) -> dict: - """ - Patches information in an existing dataset. + """Patches information in an existing dataset. + It only replaces fields that are provided in the submitted dataset resource. - This method is deprecated. Please use `update_dataset` + This method is deprecated. Please use :func:`.update_dataset` instead. More info: https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/patch @@ -993,11 +989,11 @@ def get_dataset_tables_list( table_prefix: str | None = None, max_results: int | None = None, ) -> list[dict[str, Any]]: - """ - Method returns tables list of a BigQuery tables. If table prefix is specified, - only tables beginning by it are returned. + """List tables of a BigQuery dataset. - This method is deprecated. Please use `get_dataset_tables` + If a table prefix is specified, only tables beginning by it are + returned. This method is deprecated. Please use + :func:`.get_dataset_tables` instead. For more information, see: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/list @@ -1037,8 +1033,7 @@ def get_datasets_list( retry: Retry = DEFAULT_RETRY, return_iterator: bool = False, ) -> list[DatasetListItem] | HTTPIterator: - """ - Method returns full list of BigQuery datasets in the current project. + """Get all BigQuery datasets in the current project. For more information, see: https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/list @@ -1081,16 +1076,15 @@ def get_datasets_list( @GoogleBaseHook.fallback_to_default_project_id def get_dataset(self, dataset_id: str, project_id: str | None = None) -> Dataset: - """ - Fetch the dataset referenced by dataset_id. + """Fetch the dataset referenced by *dataset_id*. :param dataset_id: The BigQuery Dataset ID :param project_id: The Google Cloud Project ID :return: dataset_resource - .. seealso:: - For more information, see Dataset Resource content: - https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource + .. seealso:: + For more information, see Dataset Resource content: + https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource """ dataset = self.get_client(project_id=project_id).get_dataset( dataset_ref=DatasetReference(project_id, dataset_id) @@ -1107,10 +1101,10 @@ def run_grant_dataset_view_access( view_project: str | None = None, project_id: str | None = None, ) -> dict[str, Any]: - """ - Grant authorized view access of a dataset to a view table. + """Grant authorized view access of a dataset to a view table. + If this view has already been granted access to the dataset, do nothing. - This method is not atomic. Running it may clobber a simultaneous update. + This method is not atomic. Running it may clobber a simultaneous update. :param source_dataset: the source dataset :param view_dataset: the dataset that the view is in @@ -1159,8 +1153,8 @@ def run_grant_dataset_view_access( def run_table_upsert( self, dataset_id: str, table_resource: dict[str, Any], project_id: str | None = None ) -> dict[str, Any]: - """ - If the table already exists, update the existing table if not create new. + """Update a table if it exists, otherwise create a new one. + Since BigQuery does not natively allow table upserts, this is not an atomic operation. @@ -1169,7 +1163,6 @@ def run_table_upsert( https://cloud.google.com/bigquery/docs/reference/v2/tables#resource :param project_id: the project to upsert the table into. If None, project will be self.project_id. - :return: """ table_id = table_resource["tableReference"]["tableId"] table_resource = self._resolve_table_reference( @@ -1188,12 +1181,12 @@ def run_table_upsert( return table def run_table_delete(self, deletion_dataset_table: str, ignore_if_missing: bool = False) -> None: - """ - Delete an existing table from the dataset; - If the table does not exist, return an error unless ignore_if_missing + """Delete an existing table from the dataset. + + If the table does not exist, return an error unless *ignore_if_missing* is set to True. - This method is deprecated. Please use `delete_table` + This method is deprecated. Please use :func:`.delete_table` instead. :param deletion_dataset_table: A dotted ``(.|:).
`` that indicates which table @@ -1214,9 +1207,10 @@ def delete_table( not_found_ok: bool = True, project_id: str | None = None, ) -> None: - """ - Delete an existing table from the dataset. If the table does not exist, return an error - unless not_found_ok is set to True. + """Delete an existing table from the dataset. + + If the table does not exist, return an error unless *not_found_ok* is + set to True. :param table_id: A dotted ``(.|:).
`` that indicates which table will be deleted. @@ -1239,12 +1233,11 @@ def get_tabledata( page_token: str | None = None, start_index: int | None = None, ) -> list[dict]: - """ - Get the data of a given dataset.table and optionally with selected columns. + """Get data from given table. - This method is deprecated. Please use `list_rows` + This method is deprecated. Please use :func:`.list_rows` instead. - see https://cloud.google.com/bigquery/docs/reference/v2/tabledata/list + .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/tabledata/list :param dataset_id: the dataset ID of the requested table. :param table_id: the table ID of the requested table. @@ -1281,8 +1274,7 @@ def list_rows( retry: Retry = DEFAULT_RETRY, return_iterator: bool = False, ) -> list[Row] | RowIterator: - """ - List the rows of the table. + """List rows in a table. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list @@ -1331,10 +1323,9 @@ def list_rows( @GoogleBaseHook.fallback_to_default_project_id def get_schema(self, dataset_id: str, table_id: str, project_id: str | None = None) -> dict: - """ - Get the schema for a given dataset and table. + """Get the schema for a given dataset and table. - see https://cloud.google.com/bigquery/docs/reference/v2/tables#resource + .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/tables#resource :param dataset_id: the dataset ID of the requested table :param table_id: the table ID of the requested table @@ -1355,32 +1346,37 @@ def update_table_schema( table_id: str, project_id: str | None = None, ) -> dict[str, Any]: - """ - Update fields within a schema for a given dataset and table. Note that - some fields in schemas are immutable and trying to change them will cause - an exception. - If a new field is included it will be inserted which requires all required fields to be set. + """Update fields within a schema for a given dataset and table. + + Note that some fields in schemas are immutable; trying to change them + will cause an exception. - See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema + If a new field is included, it will be inserted, which requires all + required fields to be set. + + .. seealso:: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema :param include_policy_tags: If set to True policy tags will be included in the update request which requires special permissions even if unchanged see https://cloud.google.com/bigquery/docs/column-level-security#roles :param dataset_id: the dataset ID of the requested table to be updated :param table_id: the table ID of the table to be updated - :param schema_fields_updates: a partial schema resource. see + :param schema_fields_updates: a partial schema resource. See https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema - **Example**: :: + .. code-block:: python - schema_fields_updates=[ - {"name": "emp_name", "description": "Some New Description"}, - {"name": "salary", "description": "Some New Description"}, - {"name": "departments", "fields": [ - {"name": "name", "description": "Some New Description"}, - {"name": "type", "description": "Some New Description"} - ]}, - ] + schema_fields_updates = [ + {"name": "emp_name", "description": "Some New Description"}, + {"name": "salary", "description": "Some New Description"}, + { + "name": "departments", + "fields": [ + {"name": "name", "description": "Some New Description"}, + {"name": "type", "description": "Some New Description"}, + ], + }, + ] :param project_id: The name of the project where we want to update the table. """ @@ -1446,8 +1442,7 @@ def poll_job_complete( location: str | None = None, retry: Retry = DEFAULT_RETRY, ) -> bool: - """ - Check if jobs completed. + """Check if jobs have completed. :param job_id: id of the job. :param project_id: Google Cloud Project where the job is running @@ -1476,8 +1471,7 @@ def cancel_job( project_id: str | None = None, location: str | None = None, ) -> None: - """ - Cancel a job and wait for cancellation to complete. + """Cancel a job and wait for cancellation to complete. :param job_id: id of the job. :param project_id: Google Cloud Project where the job is running @@ -1521,10 +1515,9 @@ def get_job( project_id: str | None = None, location: str | None = None, ) -> CopyJob | QueryJob | LoadJob | ExtractJob | UnknownJob: - """ - Retrieves a BigQuery job. + """Retrieve a BigQuery job. - See: https://cloud.google.com/bigquery/docs/reference/v2/jobs + .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs :param job_id: The ID of the job. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or dashes (-). The maximum length is 1,024 @@ -1556,11 +1549,9 @@ def insert_job( retry: Retry = DEFAULT_RETRY, timeout: float | None = None, ) -> BigQueryJob: - """ - Executes a BigQuery job. Waits for the job to complete and returns job id. + """Execute a BigQuery job and wait for it to complete. - See here: - https://cloud.google.com/bigquery/docs/reference/v2/jobs + .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs :param configuration: The configuration parameter maps directly to BigQuery's configuration field in the job object. See @@ -1569,12 +1560,13 @@ def insert_job( :param job_id: The ID of the job. The ID must contain only letters (a-z, A-Z), numbers (0-9), underscores (_), or dashes (-). The maximum length is 1,024 characters. If not provided then uuid will be generated. - :param project_id: Google Cloud Project where the job is running - :param location: location the job is running - :param nowait: specify whether to insert job without waiting for the result + :param project_id: Google Cloud Project where the job is running. + :param location: Location the job is running. + :param nowait: Whether to insert job without waiting for the result. :param retry: How to retry the RPC. :param timeout: The number of seconds to wait for the underlying HTTP transport before using ``retry``. + :return: The job ID. """ location = location or self.location job_id = job_id or self._custom_job_id(configuration) @@ -1611,14 +1603,11 @@ def insert_job( return job_api_repr def run_with_configuration(self, configuration: dict) -> str: - """ - Executes a BigQuery SQL query. - - See here: https://cloud.google.com/bigquery/docs/reference/v2/jobs + """Execute a BigQuery SQL query. - This method is deprecated. Please use `BigQueryHook.insert_job` + .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs - For more details about the configuration parameter. + This method is deprecated. Please use :func:`.insert_job` instead. :param configuration: The configuration parameter maps directly to BigQuery's configuration field in the job object. See @@ -1658,15 +1647,11 @@ def run_load( labels: dict | None = None, description: str | None = None, ) -> str: - """ - Executes a BigQuery load command to load data from Google Cloud Storage - to BigQuery. + """Load data from Google Cloud Storage to BigQuery. - See here: https://cloud.google.com/bigquery/docs/reference/v2/jobs + .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs - This method is deprecated. Please use `BigQueryHook.insert_job` method. - - For more details about these parameters. + This method is deprecated. Please use :func:`.insert_job` instead. :param destination_project_dataset_table: The dotted ``(.|:).
($)`` BigQuery @@ -1716,11 +1701,13 @@ def run_load( by one or more columns. BigQuery supports clustering for both partitioned and non-partitioned tables. The order of columns given determines the sort order. :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: + + .. code-block:: python encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", } + :param labels: A dictionary containing labels for the BiqQuery table. :param description: A string containing the description for the BigQuery table. """ @@ -1885,16 +1872,11 @@ def run_copy( labels: dict | None = None, encryption_configuration: dict | None = None, ) -> str: - """ - Executes a BigQuery copy command to copy data from one BigQuery table - to another. + """Copy data from one BigQuery table to another. + .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy - See here: https://cloud.google.com/bigquery/docs/reference/v2/jobs#configuration.copy - - This method is deprecated. Please use `BigQueryHook.insert_job` method. - - For more details about these parameters. + This method is deprecated. Please use :func:`.insert_job` instead. :param source_project_dataset_tables: One or more dotted ``(project:|project.).
`` @@ -1909,11 +1891,12 @@ def run_copy( :param labels: a dictionary containing labels for the job/query, passed to BigQuery :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: - encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" - } + .. code-block:: python + + encryption_configuration = { + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", + } """ warnings.warn( "This method is deprecated. Please use `BigQueryHook.insert_job` method.", @@ -1976,15 +1959,11 @@ def run_extract( labels: dict | None = None, return_full_job: bool = False, ) -> str | BigQueryJob: - """ - Executes a BigQuery extract command to copy data from BigQuery to - Google Cloud Storage. - - See here: https://cloud.google.com/bigquery/docs/reference/v2/jobs + """Copy data from BigQuery to Google Cloud Storage. - This method is deprecated. Please use `BigQueryHook.insert_job` method. + .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs - For more details about these parameters. + This method is deprecated. Please use :func:`.insert_job` instead. :param source_project_dataset_table: The dotted ``.
`` BigQuery table to use as the source data. @@ -2064,13 +2043,13 @@ def run_query( location: str | None = None, encryption_configuration: dict | None = None, ) -> str: - """ - Executes a BigQuery SQL query. Optionally persists results in a BigQuery - table. + """Execute a BigQuery SQL query. + + Optionally persists results in a BigQuery table. - See here: https://cloud.google.com/bigquery/docs/reference/v2/jobs + .. seealso:: https://cloud.google.com/bigquery/docs/reference/v2/jobs - This method is deprecated. Please use `BigQueryHook.insert_job` method. + This method is deprecated. Please use :func:`.insert_job` instead. For more details about these parameters. @@ -2120,11 +2099,12 @@ def run_query( US and EU. See details at https://cloud.google.com/bigquery/docs/locations#specifying_your_location :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: - encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" - } + .. code-block:: python + + encryption_configuration = { + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", + } """ warnings.warn( "This method is deprecated. Please use `BigQueryHook.insert_job` method.", @@ -2283,7 +2263,6 @@ def generate_job_id(self, job_id, dag_id, task_id, logical_date, configuration, def split_tablename( self, table_input: str, default_project_id: str, var_name: str | None = None ) -> tuple[str, str, str]: - if "." not in table_input: raise ValueError(f"Expected table name in the format of .
. Got: {table_input}") @@ -2344,7 +2323,8 @@ def var_print(var_name): class BigQueryConnection: - """ + """BigQuery connection. + BigQuery does not have a notion of a persistent connection. Thus, these objects are small stateless factories for cursors, which do all the real work. @@ -2370,7 +2350,8 @@ def rollback(self) -> NoReturn: class BigQueryBaseCursor(LoggingMixin): - """ + """BigQuery cursor. + The BigQuery base cursor contains helper methods to execute queries against BigQuery. The methods can be used directly by operators, in cases where a PEP 249 cursor isn't needed. @@ -2401,9 +2382,10 @@ def __init__( self.hook = hook def create_empty_table(self, *args, **kwargs): - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_table`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_table` + instead. """ warnings.warn( "This method is deprecated. " @@ -2414,9 +2396,10 @@ def create_empty_table(self, *args, **kwargs): return self.hook.create_empty_table(*args, **kwargs) def create_empty_dataset(self, *args, **kwargs) -> dict[str, Any]: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_dataset`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_empty_dataset` + instead. """ warnings.warn( "This method is deprecated. " @@ -2427,9 +2410,10 @@ def create_empty_dataset(self, *args, **kwargs) -> dict[str, Any]: return self.hook.create_empty_dataset(*args, **kwargs) def get_dataset_tables(self, *args, **kwargs) -> list[dict[str, Any]]: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables` + instead. """ warnings.warn( "This method is deprecated. " @@ -2440,9 +2424,10 @@ def get_dataset_tables(self, *args, **kwargs) -> list[dict[str, Any]]: return self.hook.get_dataset_tables(*args, **kwargs) def delete_dataset(self, *args, **kwargs) -> None: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.delete_dataset`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.delete_dataset` + instead. """ warnings.warn( "This method is deprecated. " @@ -2453,9 +2438,10 @@ def delete_dataset(self, *args, **kwargs) -> None: return self.hook.delete_dataset(*args, **kwargs) def create_external_table(self, *args, **kwargs): - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_external_table`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.create_external_table` + instead. """ warnings.warn( "This method is deprecated. " @@ -2466,9 +2452,10 @@ def create_external_table(self, *args, **kwargs): return self.hook.create_external_table(*args, **kwargs) def patch_table(self, *args, **kwargs) -> None: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_table`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_table` + instead. """ warnings.warn( "This method is deprecated. " @@ -2479,9 +2466,10 @@ def patch_table(self, *args, **kwargs) -> None: return self.hook.patch_table(*args, **kwargs) def insert_all(self, *args, **kwargs) -> None: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_all`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.insert_all` + instead. """ warnings.warn( "This method is deprecated. " @@ -2492,9 +2480,10 @@ def insert_all(self, *args, **kwargs) -> None: return self.hook.insert_all(*args, **kwargs) def update_dataset(self, *args, **kwargs) -> dict: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_dataset`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.update_dataset` + instead. """ warnings.warn( "This method is deprecated. " @@ -2505,9 +2494,10 @@ def update_dataset(self, *args, **kwargs) -> dict: return Dataset.to_api_repr(self.hook.update_dataset(*args, **kwargs)) def patch_dataset(self, *args, **kwargs) -> dict: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_dataset`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.patch_dataset` + instead. """ warnings.warn( "This method is deprecated. " @@ -2518,9 +2508,10 @@ def patch_dataset(self, *args, **kwargs) -> dict: return self.hook.patch_dataset(*args, **kwargs) def get_dataset_tables_list(self, *args, **kwargs) -> list[dict[str, Any]]: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables_list`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset_tables_list` + instead. """ warnings.warn( "This method is deprecated. " @@ -2531,9 +2522,10 @@ def get_dataset_tables_list(self, *args, **kwargs) -> list[dict[str, Any]]: return self.hook.get_dataset_tables_list(*args, **kwargs) def get_datasets_list(self, *args, **kwargs) -> list | HTTPIterator: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_datasets_list`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_datasets_list` + instead. """ warnings.warn( "This method is deprecated. " @@ -2544,9 +2536,10 @@ def get_datasets_list(self, *args, **kwargs) -> list | HTTPIterator: return self.hook.get_datasets_list(*args, **kwargs) def get_dataset(self, *args, **kwargs) -> Dataset: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_dataset` + instead. """ warnings.warn( "This method is deprecated. " @@ -2557,9 +2550,11 @@ def get_dataset(self, *args, **kwargs) -> Dataset: return self.hook.get_dataset(*args, **kwargs) def run_grant_dataset_view_access(self, *args, **kwargs) -> dict: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_grant_dataset_view_access`. + """This method is deprecated. + + Please use + :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_grant_dataset_view_access` + instead. """ warnings.warn( "This method is deprecated. " @@ -2571,9 +2566,10 @@ def run_grant_dataset_view_access(self, *args, **kwargs) -> dict: return self.hook.run_grant_dataset_view_access(*args, **kwargs) def run_table_upsert(self, *args, **kwargs) -> dict: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_upsert`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_upsert` + instead. """ warnings.warn( "This method is deprecated. " @@ -2584,9 +2580,10 @@ def run_table_upsert(self, *args, **kwargs) -> dict: return self.hook.run_table_upsert(*args, **kwargs) def run_table_delete(self, *args, **kwargs) -> None: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_delete`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_table_delete` + instead. """ warnings.warn( "This method is deprecated. " @@ -2597,9 +2594,10 @@ def run_table_delete(self, *args, **kwargs) -> None: return self.hook.run_table_delete(*args, **kwargs) def get_tabledata(self, *args, **kwargs) -> list[dict]: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_tabledata`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_tabledata` + instead. """ warnings.warn( "This method is deprecated. " @@ -2610,9 +2608,10 @@ def get_tabledata(self, *args, **kwargs) -> list[dict]: return self.hook.get_tabledata(*args, **kwargs) def get_schema(self, *args, **kwargs) -> dict: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_schema`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.get_schema` + instead. """ warnings.warn( "This method is deprecated. " @@ -2623,9 +2622,10 @@ def get_schema(self, *args, **kwargs) -> dict: return self.hook.get_schema(*args, **kwargs) def poll_job_complete(self, *args, **kwargs) -> bool: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.poll_job_complete`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.poll_job_complete` + instead. """ warnings.warn( "This method is deprecated. " @@ -2636,9 +2636,10 @@ def poll_job_complete(self, *args, **kwargs) -> bool: return self.hook.poll_job_complete(*args, **kwargs) def cancel_query(self, *args, **kwargs) -> None: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.cancel_query`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.cancel_query` + instead. """ warnings.warn( "This method is deprecated. " @@ -2649,9 +2650,10 @@ def cancel_query(self, *args, **kwargs) -> None: return self.hook.cancel_query(*args, **kwargs) # type: ignore def run_with_configuration(self, *args, **kwargs) -> str: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_with_configuration`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_with_configuration` + instead. """ warnings.warn( "This method is deprecated. " @@ -2662,9 +2664,10 @@ def run_with_configuration(self, *args, **kwargs) -> str: return self.hook.run_with_configuration(*args, **kwargs) def run_load(self, *args, **kwargs) -> str: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_load`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_load` + instead. """ warnings.warn( "This method is deprecated. " @@ -2675,9 +2678,10 @@ def run_load(self, *args, **kwargs) -> str: return self.hook.run_load(*args, **kwargs) def run_copy(self, *args, **kwargs) -> str: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_copy`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_copy` + instead. """ warnings.warn( "This method is deprecated. " @@ -2688,9 +2692,10 @@ def run_copy(self, *args, **kwargs) -> str: return self.hook.run_copy(*args, **kwargs) def run_extract(self, *args, **kwargs) -> str | BigQueryJob: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_extract`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_extract` + instead. """ warnings.warn( "This method is deprecated. " @@ -2701,9 +2706,10 @@ def run_extract(self, *args, **kwargs) -> str | BigQueryJob: return self.hook.run_extract(*args, **kwargs) def run_query(self, *args, **kwargs) -> str: - """ - This method is deprecated. - Please use `airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_query`. + """This method is deprecated. + + Please use :func:`~airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.run_query` + instead. """ warnings.warn( "This method is deprecated. " @@ -2715,9 +2721,9 @@ def run_query(self, *args, **kwargs) -> str: class BigQueryCursor(BigQueryBaseCursor): - """ - A very basic BigQuery PEP 249 cursor implementation. The PyHive PEP 249 - implementation was used as a reference. + """A very basic BigQuery PEP 249 cursor implementation. + + The PyHive PEP 249 implementation was used as a reference: https://github.com/dropbox/PyHive/blob/master/pyhive/presto.py https://github.com/dropbox/PyHive/blob/master/pyhive/common.py @@ -2765,8 +2771,7 @@ def rowcount(self) -> int: return -1 def execute(self, operation: str, parameters: dict | None = None) -> None: - """ - Executes a BigQuery query, and returns the job ID. + """Execute a BigQuery query, and return the job ID. :param operation: The query to execute. :param parameters: Parameters to substitute into the query. @@ -2782,8 +2787,7 @@ def execute(self, operation: str, parameters: dict | None = None) -> None: self.description = [] def executemany(self, operation: str, seq_of_parameters: list) -> None: - """ - Execute a BigQuery query multiple times with different parameters. + """Execute a BigQuery query multiple times with different parameters. :param operation: The query to execute. :param seq_of_parameters: List of dictionary parameters to substitute into the @@ -2804,8 +2808,10 @@ def fetchone(self) -> list | None: return self.next() def next(self) -> list | None: - """ - Helper method for fetchone, which returns the next row from a buffer. + """Return the next row from a buffer. + + Helper method for ``fetchone``. + If the buffer is empty, attempts to paginate through the result set for the next page, and load it into the buffer. """ @@ -2838,16 +2844,22 @@ def next(self) -> list | None: return self.buffer.pop(0) def fetchmany(self, size: int | None = None) -> list: - """ - Fetch the next set of rows of a query result, returning a sequence of sequences - (e.g. a list of tuples). An empty sequence is returned when no more rows are - available. The number of rows to fetch per call is specified by the parameter. - If it is not given, the cursor's arraysize determines the number of rows to be - fetched. The method should try to fetch as many rows as indicated by the size - parameter. If this is not possible due to the specified number of rows not being - available, fewer rows may be returned. An :py:class:`~pyhive.exc.Error` - (or subclass) exception is raised if the previous call to - :py:meth:`execute` did not produce any result set or no call was issued yet. + """Fetch the next set of rows of a query result. + + This returns a sequence of sequences (e.g. a list of tuples). An empty + sequence is returned when no more rows are available. + + The number of rows to fetch per call is specified by the parameter. If + it is not given, the cursor's arraysize determines the number of rows to + be fetched. + + This method tries to fetch as many rows as indicated by the size + parameter. If this is not possible due to the specified number of rows + not being available, fewer rows may be returned. + + An :py:class:`~pyhive.exc.Error` (or subclass) exception is raised if + the previous call to :py:meth:`execute` did not produce any result set, + or no call was issued yet. """ if size is None: size = self.arraysize @@ -2860,9 +2872,9 @@ def fetchmany(self, size: int | None = None) -> list: return result def fetchall(self) -> list[list]: - """ - Fetch all (remaining) rows of a query result, returning them as a sequence of - sequences (e.g. a list of tuples). + """Fetch all (remaining) rows of a query result. + + A sequence of sequences (e.g. a list of tuples) is returned. """ result = [] while True: @@ -2873,11 +2885,17 @@ def fetchall(self) -> list[list]: return result def get_arraysize(self) -> int: - """Specifies the number of rows to fetch at a time with .fetchmany().""" + """Number of rows to fetch at a time. + + .. seealso:: :func:`.fetchmany()` + """ return self.buffersize or 1 def set_arraysize(self, arraysize: int) -> None: - """Specifies the number of rows to fetch at a time with .fetchmany().""" + """Set the number of rows to fetch at a time. + + .. seealso:: :func:`.fetchmany()` + """ self.buffersize = arraysize arraysize = property(get_arraysize, set_arraysize) @@ -2889,7 +2907,7 @@ def setoutputsize(self, size: Any, column: Any = None) -> None: """Does nothing by default.""" def _get_query_result(self) -> dict: - """Get job query results like data, schema, job type...""" + """Get job query results; data, schema, job type, etc.""" query_results = ( self.service.jobs() .getQueryResults( @@ -2993,10 +3011,8 @@ def var_print(var_name): def _cleanse_time_partitioning( destination_dataset_table: str | None, time_partitioning_in: dict | None ) -> dict: # if it is a partitioned table ($ is in the table name) add partition load option - if time_partitioning_in is None: time_partitioning_in = {} - time_partitioning_out = {} if destination_dataset_table and "$" in destination_dataset_table: time_partitioning_out["type"] = "DAY" @@ -3005,7 +3021,7 @@ def _cleanse_time_partitioning( def _validate_value(key: Any, value: Any, expected_type: type | tuple[type]) -> None: - """Function to check expected type and raise error if type is not correct.""" + """Check expected type and raise error if type is not correct.""" if not isinstance(value, expected_type): raise TypeError(f"{key} argument must have a type {expected_type} not {type(value)}") @@ -3030,9 +3046,9 @@ def _validate_src_fmt_configs( valid_configs: list[str], backward_compatibility_configs: dict | None = None, ) -> dict: - """ - Validates the given src_fmt_configs against a valid configuration for the source format. - Adds the backward compatibility config to the src_fmt_configs. + """Validate ``src_fmt_configs`` against a valid config for the source format. + + Adds the backward compatibility config to ``src_fmt_configs``. :param source_format: File format to export. :param src_fmt_configs: Configure optional fields specific to the source format. @@ -3054,9 +3070,10 @@ def _validate_src_fmt_configs( def _format_schema_for_description(schema: dict) -> list: - """ - Reformat the schema to match cursor description standard which is a tuple - of 7 elemenbts (name, type, display_size, internal_size, precision, scale, null_ok). + """Reformat the schema to match cursor description standard. + + The description should be a tuple of 7 elemenbts: name, type, display_size, + internal_size, precision, scale, null_ok. """ description = [] for field in schema["fields"]: @@ -3091,8 +3108,7 @@ async def get_job_status( job_id: str | None, project_id: str | None = None, ) -> str | None: - """ - Polls for job status asynchronously using gcloud-aio. + """Poll for job status asynchronously using gcloud-aio. Note that an OSError is raised when Job results are still pending. Exception means that Job finished with errors @@ -3116,7 +3132,7 @@ async def get_job_output( job_id: str | None, project_id: str | None = None, ) -> dict[str, Any]: - """Get the big query job output for the given job id asynchronously using gcloud-aio.""" + """Get the BigQuery job output for a given job ID asynchronously.""" async with ClientSession() as session: self.log.info("Executing get_job_output..") job_client = await self.get_job_instance(project_id, job_id, session) @@ -3142,8 +3158,7 @@ async def create_job_for_partition_get( return job_query_resp["jobReference"]["jobId"] def get_records(self, query_results: dict[str, Any], as_dict: bool = False) -> list[Any]: - """ - Given the output query response from gcloud-aio bigquery, convert the response to records. + """Convert a response from BigQuery to records. :param query_results: the results from a SQL query :param as_dict: if True returns the result as a list of dictionaries, otherwise as list of lists. @@ -3170,10 +3185,9 @@ def value_check( records: list[Any], tolerance: float | None = None, ) -> None: - """ - Match a single query resulting row and tolerance with pass_value. + """Match a single query resulting row and tolerance with pass_value. - :return: If Match fail, we throw an AirflowException. + :raise AirflowException: if matching fails """ if not records: raise AirflowException("The query returned None") @@ -3208,8 +3222,7 @@ def value_check( def _get_numeric_matches( records: list[float], pass_value: Any, tolerance: float | None = None ) -> list[bool]: - """ - A helper function to match numeric pass_value, tolerance with records value. + """Match numeric pass_value, tolerance with records value. :param records: List of value to match against :param pass_value: Expected value @@ -3224,8 +3237,7 @@ def _get_numeric_matches( @staticmethod def _convert_to_float_if_possible(s: Any) -> Any: - """ - A small helper function to convert a string to a numeric value if appropriate. + """Convert a string to a numeric value if appropriate. :param s: the string to be converted """ @@ -3242,8 +3254,7 @@ def interval_check( ignore_zero: bool, ratio_formula: str, ) -> None: - """ - Checks that the values of metrics given as SQL expressions are within a certain tolerance. + """Check values of metrics (SQL expressions) are within a certain tolerance. :param row1: first resulting row of a query execution job for first SQL query :param row2: first resulting row of a query execution job for second SQL query @@ -3323,15 +3334,14 @@ def interval_check( class BigQueryTableAsyncHook(GoogleBaseAsyncHook): - """Class to get async hook for Bigquery Table Async.""" + """Async hook for BigQuery Table.""" sync_hook_class = BigQueryHook async def get_table_client( self, dataset: str, table_id: str, project_id: str, session: ClientSession ) -> Table_async: - """ - Returns a Google Big Query Table object. + """Get a Google Big Query Table object. :param dataset: The name of the dataset in which to look for the table storage bucket. :param table_id: The name of the table to check the existence of. diff --git a/airflow/providers/google/cloud/hooks/cloud_sql.py b/airflow/providers/google/cloud/hooks/cloud_sql.py index 1a325775f228..42abfc1c2463 100644 --- a/airflow/providers/google/cloud/hooks/cloud_sql.py +++ b/airflow/providers/google/cloud/hooks/cloud_sql.py @@ -840,12 +840,13 @@ def validate_socket_path_length(self) -> None: @staticmethod def _generate_unique_path() -> str: - """ - We are not using mkdtemp here as the path generated with mkdtemp - can be close to 60 characters and there is a limitation in - length of socket path to around 100 characters in total. - We append project/location/instance to it later and postgres - appends its own prefix, so we chose a shorter "${tempdir()}[8 random characters]". + """Generate a unique path. + + We don't using mkdtemp here since it can generate paths close to 60 + characters. We append project/location/instance to the path, Postgres + will then appends its own prefix, making the resulting path exceed the + 100 character length limitation of a socket path. This generates a + shorter path ``${tempdir()}[8 random characters]``. """ random.seed() while True: @@ -926,9 +927,10 @@ def _get_sqlproxy_instance_specification(self) -> str: return instance_specification def create_connection(self) -> Connection: - """ - Create Connection object, according to whether it uses proxy, TCP, UNIX sockets, SSL. - Connection ID will be randomly generated. + """Create a connection. + + Connection ID will be randomly generated according to whether it uses + proxy, TCP, UNIX sockets, SSL. """ uri = self._generate_connection_uri() connection = Connection(conn_id=self.db_conn_id, uri=uri) @@ -936,9 +938,9 @@ def create_connection(self) -> Connection: return connection def get_sqlproxy_runner(self) -> CloudSqlProxyRunner: - """ - Retrieve Cloud SQL Proxy runner. It is used to manage the proxy - lifecycle per task. + """Retrieve Cloud SQL Proxy runner. + + It is used to manage the proxy lifecycle per task. :return: The Cloud SQL Proxy runner. """ @@ -956,9 +958,10 @@ def get_sqlproxy_runner(self) -> CloudSqlProxyRunner: ) def get_database_hook(self, connection: Connection) -> PostgresHook | MySqlHook: - """ - Retrieve database hook. This is the actual Postgres or MySQL database hook - that uses proxy or connects directly to the Google Cloud SQL database. + """Retrieve database hook. + + This is the actual Postgres or MySQL database hook that uses proxy or + connects directly to the Google Cloud SQL database. """ if self.database_type == "postgres": db_hook: PostgresHook | MySqlHook = PostgresHook(connection=connection, schema=self.database) @@ -986,7 +989,10 @@ def reserve_free_tcp_port(self) -> None: self.sql_proxy_tcp_port = self.reserved_tcp_socket.getsockname()[1] def free_reserved_port(self) -> None: - """Free TCP port. Makes it immediately ready to be used by Cloud SQL Proxy.""" + """Free TCP port. + + Makes it immediately ready to be used by Cloud SQL Proxy. + """ if self.reserved_tcp_socket: self.reserved_tcp_socket.close() self.reserved_tcp_socket = None diff --git a/airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py b/airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py index 2823bee1f17c..888a5650d496 100644 --- a/airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py +++ b/airflow/providers/google/cloud/hooks/cloud_storage_transfer_service.py @@ -39,7 +39,7 @@ class GcpTransferJobsStatus: - """Class with Google Cloud Transfer jobs statuses.""" + """Google Cloud Transfer job status.""" ENABLED = "ENABLED" DISABLED = "DISABLED" @@ -47,7 +47,7 @@ class GcpTransferJobsStatus: class GcpTransferOperationStatus: - """Class with Google Cloud Transfer operations statuses.""" + """Google Cloud Transfer operation status.""" IN_PROGRESS = "IN_PROGRESS" PAUSED = "PAUSED" @@ -104,9 +104,7 @@ class GcpTransferOperationStatus: def gen_job_name(job_name: str) -> str: - """ - Adds unique suffix to job name. If suffix already exists, updates it. - Suffix — current timestamp. + """Add a unique suffix to the job name. :param job_name: :return: job_name with suffix @@ -116,11 +114,10 @@ def gen_job_name(job_name: str) -> str: class CloudDataTransferServiceHook(GoogleBaseHook): - """ - Hook for Google Storage Transfer Service. + """Google Storage Transfer Service functionalities. - All the methods in the hook where project_id is used must be called with - keyword arguments rather than positional. + All methods in the hook with *project_id* in the signature must be called + with keyword arguments rather than positional. """ def __init__( @@ -130,10 +127,11 @@ def __init__( impersonation_chain: str | Sequence[str] | None = None, **kwargs, ) -> None: - if kwargs.get("delegate_to") is not None: + if "delegate_to" in kwargs: raise RuntimeError( - "The `delegate_to` parameter has been deprecated before and finally removed in this version" - " of Google Provider. You MUST convert it to `impersonate_chain`" + "The `delegate_to` parameter has been deprecated before and " + "finally removed in this version of Google Provider. You MUST " + "convert it to `impersonate_chain`." ) super().__init__( gcp_conn_id=gcp_conn_id, @@ -143,8 +141,7 @@ def __init__( self._conn = None def get_conn(self) -> Resource: - """ - Retrieves connection to Google Storage Transfer service. + """Retrieve connection to Google Storage Transfer service. :return: Google Storage Transfer service object """ @@ -156,13 +153,11 @@ def get_conn(self) -> Resource: return self._conn def create_transfer_job(self, body: dict) -> dict: - """ - Creates a transfer job that runs periodically. + """Create a transfer job that runs periodically. - :param body: (Required) A request body, as described in + :param body: (Required) The request body, as described in https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/patch#request-body - :return: transfer job. - See: + :return: The transfer job. See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs#TransferJob """ body = self._inject_project_id(body, BODY, PROJECT_ID) @@ -200,9 +195,7 @@ def create_transfer_job(self, body: dict) -> dict: @GoogleBaseHook.fallback_to_default_project_id def get_transfer_job(self, job_name: str, project_id: str) -> dict: - """ - Gets the latest state of a long-running operation in Google Storage - Transfer Service. + """Get latest state of a long-running Google Storage Transfer Service job. :param job_name: (Required) Name of the job to be fetched :param project_id: (Optional) the ID of the project that owns the Transfer @@ -218,9 +211,9 @@ def get_transfer_job(self, job_name: str, project_id: str) -> dict: ) def list_transfer_job(self, request_filter: dict | None = None, **kwargs) -> list[dict]: - """ - Lists long-running operations in Google Storage Transfer - Service that match the specified filter. + """List long-running operations in Google Storage Transfer Service. + + A filter can be specified to match only certain entries. :param request_filter: (Required) A request filter, as described in https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/list#body.QUERY_PARAMETERS.filter @@ -252,8 +245,7 @@ def list_transfer_job(self, request_filter: dict | None = None, **kwargs) -> lis @GoogleBaseHook.fallback_to_default_project_id def enable_transfer_job(self, job_name: str, project_id: str) -> dict: - """ - New transfers will be performed based on the schedule. + """Make new transfers be performed based on the schedule. :param job_name: (Required) Name of the job to be updated :param project_id: (Optional) the ID of the project that owns the Transfer @@ -276,8 +268,7 @@ def enable_transfer_job(self, job_name: str, project_id: str) -> dict: ) def update_transfer_job(self, job_name: str, body: dict) -> dict: - """ - Updates a transfer job that runs periodically. + """Update a transfer job that runs periodically. :param job_name: (Required) Name of the job to be updated :param body: A request body, as described in @@ -294,11 +285,11 @@ def update_transfer_job(self, job_name: str, body: dict) -> dict: @GoogleBaseHook.fallback_to_default_project_id def delete_transfer_job(self, job_name: str, project_id: str) -> None: - """ - Deletes a transfer job. This is a soft delete. After a transfer job is - deleted, the job and all the transfer executions are subject to garbage - collection. Transfer jobs become eligible for garbage collection - 30 days after soft delete. + """Delete a transfer job. + + This is a soft delete. After a transfer job is deleted, the job and all + the transfer executions are subject to garbage collection. Transfer jobs + become eligible for garbage collection 30 days after soft delete. :param job_name: (Required) Name of the job to be deleted :param project_id: (Optional) the ID of the project that owns the Transfer @@ -320,21 +311,19 @@ def delete_transfer_job(self, job_name: str, project_id: str) -> None: ) def cancel_transfer_operation(self, operation_name: str) -> None: - """ - Cancels an transfer operation in Google Storage Transfer Service. + """Cancel a transfer operation in Google Storage Transfer Service. :param operation_name: Name of the transfer operation. """ self.get_conn().transferOperations().cancel(name=operation_name).execute(num_retries=self.num_retries) def get_transfer_operation(self, operation_name: str) -> dict: - """ - Gets an transfer operation in Google Storage Transfer Service. + """Get a transfer operation in Google Storage Transfer Service. :param operation_name: (Required) Name of the transfer operation. :return: transfer operation - See: - https://cloud.google.com/storage-transfer/docs/reference/rest/v1/Operation + + .. seealso:: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/Operation """ return ( self.get_conn() @@ -344,18 +333,15 @@ def get_transfer_operation(self, operation_name: str) -> dict: ) def list_transfer_operations(self, request_filter: dict | None = None, **kwargs) -> list[dict]: - """ - Gets an transfer operation in Google Storage Transfer Service. + """Get a transfer operation in Google Storage Transfer Service. :param request_filter: (Required) A request filter, as described in https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs/list#body.QUERY_PARAMETERS.filter With one additional improvement: - - * project_id is optional if you have a project id defined - in the connection - See: :doc:`/connections/gcp` - :return: transfer operation + + The ``project_id`` parameter is optional if you have a project ID + defined in the connection. See: :doc:`/connections/gcp` """ # To preserve backward compatibility # TODO: remove one day @@ -390,16 +376,14 @@ def list_transfer_operations(self, request_filter: dict | None = None, **kwargs) return operations def pause_transfer_operation(self, operation_name: str) -> None: - """ - Pauses an transfer operation in Google Storage Transfer Service. + """Pause a transfer operation in Google Storage Transfer Service. :param operation_name: (Required) Name of the transfer operation. """ self.get_conn().transferOperations().pause(name=operation_name).execute(num_retries=self.num_retries) def resume_transfer_operation(self, operation_name: str) -> None: - """ - Resumes an transfer operation in Google Storage Transfer Service. + """Resume a transfer operation in Google Storage Transfer Service. :param operation_name: (Required) Name of the transfer operation. """ @@ -411,17 +395,14 @@ def wait_for_transfer_job( expected_statuses: set[str] | None = None, timeout: float | timedelta | None = None, ) -> None: - """ - Waits until the job reaches the expected state. + """Wait until the job reaches the expected state. - :param job: Transfer job - See: + :param job: The transfer job to wait for. See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferJobs#TransferJob - :param expected_statuses: State that is expected - See: + :param expected_statuses: The expected state. See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status - :param timeout: Time in which the operation must end in seconds. If not specified, defaults to 60 - seconds. + :param timeout: Time in which the operation must end in seconds. If not + specified, defaults to 60 seconds. """ expected_statuses = ( {GcpTransferOperationStatus.SUCCESS} if not expected_statuses else expected_statuses @@ -458,20 +439,15 @@ def _inject_project_id(self, body: dict, param_name: str, target_key: str) -> di def operations_contain_expected_statuses( operations: list[dict], expected_statuses: set[str] | str ) -> bool: - """ - Checks whether the operation list has an operation with the - expected status, then returns true - If it encounters operations in FAILED or ABORTED state - throw :class:`airflow.exceptions.AirflowException`. + """Check whether an operation exists with the expected status. :param operations: (Required) List of transfer operations to check. - :param expected_statuses: (Required) status that is expected - See: + :param expected_statuses: (Required) The expected status. See: https://cloud.google.com/storage-transfer/docs/reference/rest/v1/transferOperations#Status - :return: If there is an operation with the expected state - in the operation list, returns true, - :raises: airflow.exceptions.AirflowException If it encounters operations - with a state in the list, + :return: If there is an operation with the expected state in the + operation list, returns true, + :raises AirflowException: If it encounters operations with state FAILED + or ABORTED in the list. """ expected_statuses_set = ( {expected_statuses} if isinstance(expected_statuses, str) else set(expected_statuses) diff --git a/airflow/providers/google/cloud/hooks/dataflow.py b/airflow/providers/google/cloud/hooks/dataflow.py index e393c6dcbb91..3e0b07fcb28d 100644 --- a/airflow/providers/google/cloud/hooks/dataflow.py +++ b/airflow/providers/google/cloud/hooks/dataflow.py @@ -57,29 +57,25 @@ def process_line_and_extract_dataflow_job_id_callback( on_new_job_id_callback: Callable[[str], None] | None ) -> Callable[[str], None]: - """ - Returns callback which triggers function passed as `on_new_job_id_callback` when Dataflow job_id is found. - To be used for `process_line_callback` in + """Build callback that triggers the specified function. + + The returned callback is intended to be used as ``process_line_callback`` in :py:class:`~airflow.providers.apache.beam.hooks.beam.BeamCommandRunner`. :param on_new_job_id_callback: Callback called when the job ID is known """ - def _process_line_and_extract_job_id( - line: str, - # on_new_job_id_callback: Callable[[str], None] | None - ) -> None: + def _process_line_and_extract_job_id(line: str) -> None: # Job id info: https://goo.gl/SE29y9. + if on_new_job_id_callback is None: + return matched_job = JOB_ID_PATTERN.search(line) - if matched_job: - job_id = matched_job.group("job_id_java") or matched_job.group("job_id_python") - if on_new_job_id_callback: - on_new_job_id_callback(job_id) - - def wrap(line: str): - return _process_line_and_extract_job_id(line) + if matched_job is None: + return + job_id = matched_job.group("job_id_java") or matched_job.group("job_id_python") + on_new_job_id_callback(job_id) - return wrap + return _process_line_and_extract_job_id def _fallback_variable_parameter(parameter_name: str, variable_key_name: str) -> Callable[[T], T]: diff --git a/airflow/providers/google/cloud/hooks/dataform.py b/airflow/providers/google/cloud/hooks/dataform.py index 67797a7ce32b..d7f86157f581 100644 --- a/airflow/providers/google/cloud/hooks/dataform.py +++ b/airflow/providers/google/cloud/hooks/dataform.py @@ -604,9 +604,9 @@ def install_npm_packages( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> InstallNpmPackagesResponse: - """ - Installs npm dependencies in the provided workspace. Requires "package.json" - to be created in workspace. + """Install NPM dependencies in the provided workspace. + + Requires "package.json" to be created in the workspace. :param project_id: Required. The ID of the Google Cloud project where workspace located. :param region: Required. The ID of the Google Cloud region where workspace located. diff --git a/airflow/providers/google/cloud/hooks/dataproc.py b/airflow/providers/google/cloud/hooks/dataproc.py index 39670be95d56..cc59dc8d7274 100644 --- a/airflow/providers/google/cloud/hooks/dataproc.py +++ b/airflow/providers/google/cloud/hooks/dataproc.py @@ -78,8 +78,7 @@ def __init__( self.job["job"][job_type]["properties"] = properties def add_labels(self, labels: dict | None = None) -> None: - """ - Set labels for Dataproc job. + """Set labels for Dataproc job. :param labels: Labels for the job query. """ @@ -87,8 +86,7 @@ def add_labels(self, labels: dict | None = None) -> None: self.job["job"]["labels"].update(labels) def add_variables(self, variables: dict | None = None) -> None: - """ - Set variables for Dataproc job. + """Set variables for Dataproc job. :param variables: Variables for the job query. """ @@ -96,8 +94,7 @@ def add_variables(self, variables: dict | None = None) -> None: self.job["job"][self.job_type]["script_variables"] = variables def add_args(self, args: list[str] | None = None) -> None: - """ - Set args for Dataproc job. + """Set args for Dataproc job. :param args: Args for the job query. """ @@ -105,24 +102,21 @@ def add_args(self, args: list[str] | None = None) -> None: self.job["job"][self.job_type]["args"] = args def add_query(self, query: str) -> None: - """ - Set query for Dataproc job. + """Set query for Dataproc job. :param query: query for the job. """ self.job["job"][self.job_type]["query_list"] = {"queries": [query]} def add_query_uri(self, query_uri: str) -> None: - """ - Set query uri for Dataproc job. + """Set query uri for Dataproc job. :param query_uri: URI for the job query. """ self.job["job"][self.job_type]["query_file_uri"] = query_uri def add_jar_file_uris(self, jars: list[str] | None = None) -> None: - """ - Set jars uris for Dataproc job. + """Set jars uris for Dataproc job. :param jars: List of jars URIs """ @@ -130,8 +124,7 @@ def add_jar_file_uris(self, jars: list[str] | None = None) -> None: self.job["job"][self.job_type]["jar_file_uris"] = jars def add_archive_uris(self, archives: list[str] | None = None) -> None: - """ - Set archives uris for Dataproc job. + """Set archives uris for Dataproc job. :param archives: List of archives URIs """ @@ -139,8 +132,7 @@ def add_archive_uris(self, archives: list[str] | None = None) -> None: self.job["job"][self.job_type]["archive_uris"] = archives def add_file_uris(self, files: list[str] | None = None) -> None: - """ - Set file uris for Dataproc job. + """Set file uris for Dataproc job. :param files: List of files URIs """ @@ -148,8 +140,7 @@ def add_file_uris(self, files: list[str] | None = None) -> None: self.job["job"][self.job_type]["file_uris"] = files def add_python_file_uris(self, pyfiles: list[str] | None = None) -> None: - """ - Set python file uris for Dataproc job. + """Set python file uris for Dataproc job. :param pyfiles: List of python files URIs """ @@ -157,8 +148,7 @@ def add_python_file_uris(self, pyfiles: list[str] | None = None) -> None: self.job["job"][self.job_type]["python_file_uris"] = pyfiles def set_main(self, main_jar: str | None = None, main_class: str | None = None) -> None: - """ - Set Dataproc main class. + """Set Dataproc main class. :param main_jar: URI for the main file. :param main_class: Name of the main class. @@ -172,16 +162,16 @@ def set_main(self, main_jar: str | None = None, main_class: str | None = None) - self.job["job"][self.job_type]["main_class"] = main_class def set_python_main(self, main: str) -> None: - """ - Set Dataproc main python file uri. + """Set Dataproc main python file uri. :param main: URI for the python main file. """ self.job["job"][self.job_type]["main_python_file_uri"] = main def set_job_name(self, name: str) -> None: - """ - Set Dataproc job name. Job name is sanitized, replacing dots by underscores. + """Set Dataproc job name. + + Job name is sanitized, replacing dots by underscores. :param name: Job name. """ @@ -189,8 +179,7 @@ def set_job_name(self, name: str) -> None: self.job["job"]["reference"]["job_id"] = sanitized_name def build(self) -> dict: - """ - Returns Dataproc job. + """Return Dataproc job. :return: Dataproc job """ @@ -198,8 +187,7 @@ def build(self) -> dict: class DataprocHook(GoogleBaseHook): - """ - Hook for Google Cloud Dataproc APIs. + """Google Cloud Dataproc APIs. All the methods in the hook where project_id is used must be called with keyword arguments rather than positional. @@ -219,7 +207,7 @@ def __init__( super().__init__(gcp_conn_id=gcp_conn_id, impersonation_chain=impersonation_chain) def get_cluster_client(self, region: str | None = None) -> ClusterControllerClient: - """Returns ClusterControllerClient.""" + """Create a ClusterControllerClient.""" client_options = None if region and region != "global": client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443") @@ -229,7 +217,7 @@ def get_cluster_client(self, region: str | None = None) -> ClusterControllerClie ) def get_template_client(self, region: str | None = None) -> WorkflowTemplateServiceClient: - """Returns WorkflowTemplateServiceClient.""" + """Create a WorkflowTemplateServiceClient.""" client_options = None if region and region != "global": client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443") @@ -239,7 +227,7 @@ def get_template_client(self, region: str | None = None) -> WorkflowTemplateServ ) def get_job_client(self, region: str | None = None) -> JobControllerClient: - """Returns JobControllerClient.""" + """Create a JobControllerClient.""" client_options = None if region and region != "global": client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443") @@ -249,7 +237,7 @@ def get_job_client(self, region: str | None = None) -> JobControllerClient: ) def get_batch_client(self, region: str | None = None) -> BatchControllerClient: - """Returns BatchControllerClient.""" + """Create a BatchControllerClient.""" client_options = None if region and region != "global": client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443") @@ -258,8 +246,8 @@ def get_batch_client(self, region: str | None = None) -> BatchControllerClient: credentials=self.get_credentials(), client_info=CLIENT_INFO, client_options=client_options ) - def get_operations_client(self, region): - """Returns OperationsClient.""" + def get_operations_client(self, region: str | None): + """Create a OperationsClient.""" return self.get_batch_client(region=region).transport.operations_client def wait_for_operation( @@ -267,8 +255,8 @@ def wait_for_operation( operation: Operation, timeout: float | None = None, result_retry: Retry | _MethodDefault = DEFAULT, - ): - """Waits for long-lasting operation to complete.""" + ) -> Any: + """Wait for a long-lasting operation to complete.""" try: return operation.result(timeout=timeout, retry=result_retry) except Exception: @@ -288,28 +276,30 @@ def create_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Creates a cluster in a project. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Name of the cluster to create - :param labels: Labels that will be assigned to created cluster - :param cluster_config: Required. The cluster config to create. - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.dataproc_v1.types.ClusterConfig` - :param virtual_cluster_config: Optional. The virtual cluster config, used when creating a Dataproc - cluster that does not directly control the underlying compute resources, for example, when - creating a `Dataproc-on-GKE cluster` - :class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig` - :param request_id: Optional. A unique id used to identify the request. If the server receives two - ``CreateClusterRequest`` requests with the same id, then the second request will be ignored and - the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> Operation: + """Create a cluster in a specified project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region in which to handle the request. + :param cluster_name: Name of the cluster to create. + :param labels: Labels that will be assigned to created cluster. + :param cluster_config: The cluster config to create. If a dict is + provided, it must be of the same form as the protobuf message + :class:`~google.cloud.dataproc_v1.types.ClusterConfig`. + :param virtual_cluster_config: The virtual cluster config, used when + creating a Dataproc cluster that does not directly control the + underlying compute resources, for example, when creating a + Dataproc-on-GKE cluster with + :class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig`. + :param request_id: A unique id used to identify the request. If the + server receives two *CreateClusterRequest* requests with the same + ID, the second request will be ignored, and an operation created + for the first one and stored in the backend is returned. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ # Dataproc labels must conform to the following regex: @@ -353,22 +343,23 @@ def delete_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Deletes a cluster in a project. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Required. The cluster name. - :param cluster_uuid: Optional. Specifying the ``cluster_uuid`` means the RPC should fail - if cluster with specified UUID does not exist. - :param request_id: Optional. A unique id used to identify the request. If the server receives two - ``DeleteClusterRequest`` requests with the same id, then the second request will be ignored and - the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> Operation: + """Delete a cluster in a project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region in which to handle the request. + :param cluster_name: Name of the cluster to delete. + :param cluster_uuid: If specified, the RPC should fail if cluster with + the UUID does not exist. + :param request_id: A unique id used to identify the request. If the + server receives two *DeleteClusterRequest* requests with the same + ID, the second request will be ignored, and an operation created + for the first one and stored in the backend is returned. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_cluster_client(region=region) @@ -395,18 +386,19 @@ def diagnose_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Gets cluster diagnostic information. After the operation completes GCS uri to - diagnose is returned. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Required. The cluster name. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> str: + """Get cluster diagnostic information. + + After the operation completes, the GCS URI to diagnose is returned. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region in which to handle the request. + :param cluster_name: Name of the cluster. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_cluster_client(region=region) @@ -429,17 +421,17 @@ def get_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Gets the resource representation for a cluster in a project. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Required. The cluster name. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> Cluster: + """Get the resource representation for a cluster in a project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param cluster_name: The cluster name. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_cluster_client(region=region) @@ -462,19 +454,21 @@ def list_clusters( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ): - """ - Lists all regions/{region}/clusters in a project. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param filter_: Optional. A filter constraining the clusters to list. Filters are case-sensitive. - :param page_size: The maximum number of resources contained in the underlying API response. If page - streaming is performed per- resource, this parameter does not affect the return value. If page - streaming is performed per-page, this determines the maximum number of resources in a page. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """List all regions/{region}/clusters in a project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param filter_: To constrain the clusters to. Case-sensitive. + :param page_size: The maximum number of resources contained in the + underlying API response. If page streaming is performed + per-resource, this parameter does not affect the return value. If + page streaming is performed per-page, this determines the maximum + number of resources in a page. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_cluster_client(region=region) @@ -499,53 +493,56 @@ def update_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Updates a cluster in a project. - - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Required. The cluster name. - :param cluster: Required. The changes to the cluster. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.dataproc_v1.types.Cluster` - :param update_mask: Required. Specifies the path, relative to ``Cluster``, of the field to update. For - example, to change the number of workers in a cluster to 5, the ``update_mask`` parameter would be - specified as ``config.worker_config.num_instances``, and the ``PATCH`` request body would specify - the new value, as follows: - - :: - - { "config":{ "workerConfig":{ "numInstances":"5" } } } - - Similarly, to change the number of preemptible workers in a cluster to 5, the ``update_mask`` - parameter would be ``config.secondary_worker_config.num_instances``, and the ``PATCH`` request - body would be set as follows: - - :: - - { "config":{ "secondaryWorkerConfig":{ "numInstances":"5" } } } - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.dataproc_v1.types.FieldMask` - :param graceful_decommission_timeout: Optional. Timeout for graceful YARN decommissioning. Graceful - decommissioning allows removing nodes from the cluster without interrupting jobs in progress. - Timeout specifies how long to wait for jobs in progress to finish before forcefully removing nodes - (and potentially interrupting jobs). Default timeout is 0 (for forceful decommission), and the - maximum allowed timeout is 1 day. + ) -> Operation: + """Update a cluster in a project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param cluster_name: The cluster name. + :param cluster: Changes to the cluster. If a dict is provided, it must + be of the same form as the protobuf message + :class:`~google.cloud.dataproc_v1.types.Cluster`. + :param update_mask: Specifies the path, relative to ``Cluster``, of the + field to update. For example, to change the number of workers in a + cluster to 5, this would be specified as + ``config.worker_config.num_instances``, and the ``PATCH`` request + body would specify the new value: + + .. code-block:: python + + {"config": {"workerConfig": {"numInstances": "5"}}} + + Similarly, to change the number of preemptible workers in a cluster + to 5, this would be ``config.secondary_worker_config.num_instances`` + and the ``PATCH`` request body would be: + + .. code-block:: python + + {"config": {"secondaryWorkerConfig": {"numInstances": "5"}}} + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.dataproc_v1.types.FieldMask`. + :param graceful_decommission_timeout: Timeout for graceful YARN + decommissioning. Graceful decommissioning allows removing nodes from + the cluster without interrupting jobs in progress. Timeout specifies + how long to wait for jobs in progress to finish before forcefully + removing nodes (and potentially interrupting jobs). Default timeout + is 0 (for forceful decommission), and the maximum allowed timeout is + one day. Only supported on Dataproc image versions 1.2 and higher. - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.dataproc_v1.types.Duration` - :param request_id: Optional. A unique id used to identify the request. If the server receives two - ``UpdateClusterRequest`` requests with the same id, then the second request will be ignored and - the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.dataproc_v1.types.Duration`. + :param request_id: A unique id used to identify the request. If the + server receives two *UpdateClusterRequest* requests with the same + ID, the second request will be ignored, and an operation created + for the first one and stored in the backend is returned. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -577,17 +574,18 @@ def create_workflow_template( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> WorkflowTemplate: - """ - Creates new workflow template. - - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param template: The Dataproc workflow template to create. If a dict is provided, - it must be of the same form as the protobuf message WorkflowTemplate. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Create a new workflow template. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param template: The Dataproc workflow template to create. If a dict is + provided, it must be of the same form as the protobuf message + WorkflowTemplate. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -611,27 +609,27 @@ def instantiate_workflow_template( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Instantiates a template and begins execution. + ) -> Operation: + """Instantiate a template and begins execution. :param template_name: Name of template to instantiate. - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param version: Optional. The version of workflow template to instantiate. If specified, - the workflow will be instantiated only if the current version of - the workflow template has the supplied version. - This option cannot be used to instantiate a previous version of - workflow template. - :param request_id: Optional. A tag that prevents multiple concurrent workflow instances - with the same tag from running. This mitigates risk of concurrent - instances started due to retries. - :param parameters: Optional. Map from parameter names to values that should be used for those - parameters. Values may not exceed 100 characters. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param version: Version of workflow template to instantiate. If + specified, the workflow will be instantiated only if the current + version of the workflow template has the supplied version. This + option cannot be used to instantiate a previous version of workflow + template. + :param request_id: A tag that prevents multiple concurrent workflow + instances with the same tag from running. This mitigates risk of + concurrent instances started due to retries. + :param parameters: Map from parameter names to values that should be + used for those parameters. Values may not exceed 100 characters. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -657,21 +655,22 @@ def instantiate_inline_workflow_template( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Instantiates a template and begins execution. - - :param template: The workflow template to instantiate. If a dict is provided, - it must be of the same form as the protobuf message WorkflowTemplate - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param request_id: Optional. A tag that prevents multiple concurrent workflow instances - with the same tag from running. This mitigates risk of concurrent - instances started due to retries. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> Operation: + """Instantiate a template and begin execution. + + :param template: The workflow template to instantiate. If a dict is + provided, it must be of the same form as the protobuf message + WorkflowTemplate. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param request_id: A tag that prevents multiple concurrent workflow + instances with the same tag from running. This mitigates risk of + concurrent instances started due to retries. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -696,14 +695,13 @@ def wait_for_job( wait_time: int = 10, timeout: int | None = None, ) -> None: - """ - Helper method which polls a job to check if it finishes. + """Poll a job to check if it has finished. - :param job_id: Id of the Dataproc job - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param wait_time: Number of seconds between checks - :param timeout: How many seconds wait for job to be ready. Used only if ``asynchronous`` is False + :param job_id: Dataproc job ID. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param wait_time: Number of seconds between checks. + :param timeout: How many seconds wait for job to be ready. """ if region is None: raise TypeError("missing 1 required keyword argument: 'region'") @@ -734,16 +732,16 @@ def get_job( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Job: - """ - Gets the resource representation for a job in a project. - - :param job_id: Id of the Dataproc job - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Get the resource representation for a job in a project. + + :param job_id: Dataproc job ID. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -768,20 +766,20 @@ def submit_job( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Job: - """ - Submits a job to a cluster. - - :param job: The job resource. If a dict is provided, - it must be of the same form as the protobuf message Job - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param request_id: Optional. A tag that prevents multiple concurrent workflow instances - with the same tag from running. This mitigates risk of concurrent - instances started due to retries. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Submit a job to a cluster. + + :param job: The job resource. If a dict is provided, it must be of the + same form as the protobuf message Job. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param request_id: A tag that prevents multiple concurrent workflow + instances with the same tag from running. This mitigates risk of + concurrent instances started due to retries. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -804,16 +802,16 @@ def cancel_job( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Job: - """ - Starts a job cancellation request. - - :param project_id: Required. The ID of the Google Cloud project that the job belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param job_id: Required. The job ID. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Start a job cancellation request. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param job_id: The job ID. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_job_client(region=region) @@ -838,22 +836,23 @@ def create_batch( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Operation: - """ - Creates a batch workload. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param batch: Required. The batch to create. - :param batch_id: Optional. The ID to use for the batch, which will become the final component - of the batch's resource name. - This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :param request_id: Optional. A unique id used to identify the request. If the server receives two - ``CreateBatchRequest`` requests with the same id, then the second request will be ignored and - the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Create a batch workload. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param batch: The batch to create. + :param batch_id: The ID to use for the batch, which will become the + final component of the batch's resource name. This value must be of + 4-63 characters. Valid characters are ``[a-z][0-9]-``. + :param request_id: A unique id used to identify the request. If the + server receives two *CreateBatchRequest* requests with the same + ID, the second request will be ignored, and an operation created + for the first one and stored in the backend is returned. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_batch_client(region) @@ -882,18 +881,16 @@ def delete_batch( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> None: - """ - Deletes the batch workload resource. - - :param batch_id: Required. The ID to use for the batch, which will become the final component - of the batch's resource name. - This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Delete the batch workload resource. + + :param batch_id: The batch ID. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_batch_client(region) @@ -918,18 +915,16 @@ def get_batch( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Batch: - """ - Gets the batch workload resource representation. - - :param batch_id: Required. The ID to use for the batch, which will become the final component - of the batch's resource name. - This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Get the batch workload resource representation. + + :param batch_id: The batch ID. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_batch_client(region) @@ -956,19 +951,20 @@ def list_batches( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ): - """ - Lists batch workloads. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param page_size: Optional. The maximum number of batches to return in each response. The service may - return fewer than this value. The default page size is 20; the maximum page size is 1000. - :param page_token: Optional. A page token received from a previous ``ListBatches`` call. - Provide this token to retrieve the subsequent page. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """List batch workloads. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param page_size: The maximum number of batches to return in each + response. The service may return fewer than this value. The default + page size is 20; the maximum page size is 1000. + :param page_token: A page token received from a previous ``ListBatches`` + call. Provide this token to retrieve the subsequent page. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_batch_client(region) @@ -997,24 +993,24 @@ def wait_for_batch( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Batch: - """ - Wait for a Batch job to complete. - - After Batch job submission, the operator will wait for the job to complete, however, this is useful - in the case where Airflow is restarted or the task pid is killed for any reason. In this case, the - Batch create will happen again, AlreadyExists will be raised and caught, then should fall to this - function for waiting on completion. - - :param batch_id: Required. The ID to use for the batch, which will become the final component - of the batch's resource name. - This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param wait_check_interval: The amount of time to pause between checks for job completion - :param retry: A retry object used to retry requests to get_batch. - If ``None`` is specified, requests will not be retried. - :param timeout: The amount of time, in seconds, to wait for the create_batch request to complete. - Note that if ``retry`` is specified, the timeout applies to each individual attempt. + """Wait for a batch job to complete. + + After submission of a batch job, the operator waits for the job to + complete. This hook is, however, useful in the case when Airflow is + restarted or the task pid is killed for any reason. In this case, the + creation would happen again, catching the raised AlreadyExists, and fail + to this function for waiting on completion. + + :param batch_id: The batch ID. + :param region: Cloud Dataproc region to handle the request. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param wait_check_interval: The amount of time to pause between checks + for job completion. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ state = None @@ -1050,8 +1046,7 @@ def wait_for_batch( class DataprocAsyncHook(GoogleBaseHook): - """ - Asynchronous Hook for Google Cloud Dataproc APIs. + """Asynchronous interaction with Google Cloud Dataproc APIs. All the methods in the hook where project_id is used must be called with keyword arguments rather than positional. @@ -1072,7 +1067,7 @@ def __init__( self._cached_client: JobControllerAsyncClient | None = None def get_cluster_client(self, region: str | None = None) -> ClusterControllerAsyncClient: - """Returns ClusterControllerAsyncClient.""" + """Create a ClusterControllerAsyncClient.""" client_options = None if region and region != "global": client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443") @@ -1082,7 +1077,7 @@ def get_cluster_client(self, region: str | None = None) -> ClusterControllerAsyn ) def get_template_client(self, region: str | None = None) -> WorkflowTemplateServiceAsyncClient: - """Returns WorkflowTemplateServiceAsyncClient.""" + """Create a WorkflowTemplateServiceAsyncClient.""" client_options = None if region and region != "global": client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443") @@ -1092,7 +1087,7 @@ def get_template_client(self, region: str | None = None) -> WorkflowTemplateServ ) def get_job_client(self, region: str | None = None) -> JobControllerAsyncClient: - """Returns JobControllerAsyncClient.""" + """Create a JobControllerAsyncClient.""" if self._cached_client is None: client_options = None if region and region != "global": @@ -1106,7 +1101,7 @@ def get_job_client(self, region: str | None = None) -> JobControllerAsyncClient: return self._cached_client def get_batch_client(self, region: str | None = None) -> BatchControllerAsyncClient: - """Returns BatchControllerAsyncClient.""" + """Create a BatchControllerAsyncClient.""" client_options = None if region and region != "global": client_options = ClientOptions(api_endpoint=f"{region}-dataproc.googleapis.com:443") @@ -1116,7 +1111,7 @@ def get_batch_client(self, region: str | None = None) -> BatchControllerAsyncCli ) def get_operations_client(self, region: str) -> OperationsClient: - """Returns OperationsClient.""" + """Create a OperationsClient.""" return self.get_template_client(region=region).transport.operations_client @GoogleBaseHook.fallback_to_default_project_id @@ -1132,28 +1127,30 @@ async def create_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Creates a cluster in a project. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Name of the cluster to create - :param labels: Labels that will be assigned to created cluster - :param cluster_config: Required. The cluster config to create. - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.dataproc_v1.types.ClusterConfig` - :param virtual_cluster_config: Optional. The virtual cluster config, used when creating a Dataproc - cluster that does not directly control the underlying compute resources, for example, when - creating a `Dataproc-on-GKE cluster` - :class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig` - :param request_id: Optional. A unique id used to identify the request. If the server receives two - ``CreateClusterRequest`` requests with the same id, then the second request will be ignored and - the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> AsyncOperation: + """Create a cluster in a project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region in which to handle the request. + :param cluster_name: Name of the cluster to create. + :param labels: Labels that will be assigned to created cluster. + :param cluster_config: The cluster config to create. If a dict is + provided, it must be of the same form as the protobuf message + :class:`~google.cloud.dataproc_v1.types.ClusterConfig`. + :param virtual_cluster_config: The virtual cluster config, used when + creating a Dataproc cluster that does not directly control the + underlying compute resources, for example, when creating a + Dataproc-on-GKE cluster with + :class:`~google.cloud.dataproc_v1.types.VirtualClusterConfig`. + :param request_id: A unique id used to identify the request. If the + server receives two *CreateClusterRequest* requests with the same + ID, the second request will be ignored, and an operation created + for the first one and stored in the backend is returned. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ # Dataproc labels must conform to the following regex: @@ -1197,26 +1194,27 @@ async def delete_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Deletes a cluster in a project. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Required. The cluster name. - :param cluster_uuid: Optional. Specifying the ``cluster_uuid`` means the RPC should fail - if cluster with specified UUID does not exist. - :param request_id: Optional. A unique id used to identify the request. If the server receives two - ``DeleteClusterRequest`` requests with the same id, then the second request will be ignored and - the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> AsyncOperation: + """Delete a cluster in a project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region in which to handle the request. + :param cluster_name: Name of the cluster to delete. + :param cluster_uuid: If specified, the RPC should fail if cluster with + the UUID does not exist. + :param request_id: A unique id used to identify the request. If the + server receives two *DeleteClusterRequest* requests with the same + ID, the second request will be ignored, and an operation created + for the first one and stored in the backend is returned. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_cluster_client(region=region) - result = client.delete_cluster( + result = await client.delete_cluster( request={ "project_id": project_id, "region": region, @@ -1239,18 +1237,19 @@ async def diagnose_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Gets cluster diagnostic information. After the operation completes GCS uri to - diagnose is returned. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Required. The cluster name. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> str: + """Get cluster diagnostic information. + + After the operation completes, the GCS URI to diagnose is returned. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region in which to handle the request. + :param cluster_name: Name of the cluster. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_cluster_client(region=region) @@ -1273,17 +1272,17 @@ async def get_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Gets the resource representation for a cluster in a project. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Required. The cluster name. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> Cluster: + """Get the resource representation for a cluster in a project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param cluster_name: The cluster name. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_cluster_client(region=region) @@ -1306,19 +1305,21 @@ async def list_clusters( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ): - """ - Lists all regions/{region}/clusters in a project. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param filter_: Optional. A filter constraining the clusters to list. Filters are case-sensitive. - :param page_size: The maximum number of resources contained in the underlying API response. If page - streaming is performed per- resource, this parameter does not affect the return value. If page - streaming is performed per-page, this determines the maximum number of resources in a page. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """List all regions/{region}/clusters in a project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param filter_: To constrain the clusters to. Case-sensitive. + :param page_size: The maximum number of resources contained in the + underlying API response. If page streaming is performed + per-resource, this parameter does not affect the return value. If + page streaming is performed per-page, this determines the maximum + number of resources in a page. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_cluster_client(region=region) @@ -1343,53 +1344,56 @@ async def update_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Updates a cluster in a project. - - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param cluster_name: Required. The cluster name. - :param cluster: Required. The changes to the cluster. - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.dataproc_v1.types.Cluster` - :param update_mask: Required. Specifies the path, relative to ``Cluster``, of the field to update. For - example, to change the number of workers in a cluster to 5, the ``update_mask`` parameter would be - specified as ``config.worker_config.num_instances``, and the ``PATCH`` request body would specify - the new value, as follows: - - :: - - { "config":{ "workerConfig":{ "numInstances":"5" } } } - - Similarly, to change the number of preemptible workers in a cluster to 5, the ``update_mask`` - parameter would be ``config.secondary_worker_config.num_instances``, and the ``PATCH`` request - body would be set as follows: - - :: - - { "config":{ "secondaryWorkerConfig":{ "numInstances":"5" } } } - - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.dataproc_v1.types.FieldMask` - :param graceful_decommission_timeout: Optional. Timeout for graceful YARN decommissioning. Graceful - decommissioning allows removing nodes from the cluster without interrupting jobs in progress. - Timeout specifies how long to wait for jobs in progress to finish before forcefully removing nodes - (and potentially interrupting jobs). Default timeout is 0 (for forceful decommission), and the - maximum allowed timeout is 1 day. + ) -> AsyncOperation: + """Update a cluster in a project. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param cluster_name: The cluster name. + :param cluster: Changes to the cluster. If a dict is provided, it must + be of the same form as the protobuf message + :class:`~google.cloud.dataproc_v1.types.Cluster`. + :param update_mask: Specifies the path, relative to ``Cluster``, of the + field to update. For example, to change the number of workers in a + cluster to 5, this would be specified as + ``config.worker_config.num_instances``, and the ``PATCH`` request + body would specify the new value: + + .. code-block:: python + + {"config": {"workerConfig": {"numInstances": "5"}}} + + Similarly, to change the number of preemptible workers in a cluster + to 5, this would be ``config.secondary_worker_config.num_instances`` + and the ``PATCH`` request body would be: + + .. code-block:: python + + {"config": {"secondaryWorkerConfig": {"numInstances": "5"}}} + + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.dataproc_v1.types.FieldMask`. + :param graceful_decommission_timeout: Timeout for graceful YARN + decommissioning. Graceful decommissioning allows removing nodes from + the cluster without interrupting jobs in progress. Timeout specifies + how long to wait for jobs in progress to finish before forcefully + removing nodes (and potentially interrupting jobs). Default timeout + is 0 (for forceful decommission), and the maximum allowed timeout is + one day. Only supported on Dataproc image versions 1.2 and higher. - If a dict is provided, it must be of the same form as the protobuf message - :class:`~google.cloud.dataproc_v1.types.Duration` - :param request_id: Optional. A unique id used to identify the request. If the server receives two - ``UpdateClusterRequest`` requests with the same id, then the second request will be ignored and - the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + If a dict is provided, it must be of the same form as the protobuf + message :class:`~google.cloud.dataproc_v1.types.Duration`. + :param request_id: A unique id used to identify the request. If the + server receives two *UpdateClusterRequest* requests with the same + ID, the second request will be ignored, and an operation created + for the first one and stored in the backend is returned. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -1421,17 +1425,18 @@ async def create_workflow_template( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> WorkflowTemplate: - """ - Creates new workflow template. - - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param template: The Dataproc workflow template to create. If a dict is provided, - it must be of the same form as the protobuf message WorkflowTemplate. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Create a new workflow template. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param template: The Dataproc workflow template to create. If a dict is + provided, it must be of the same form as the protobuf message + WorkflowTemplate. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -1455,27 +1460,27 @@ async def instantiate_workflow_template( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Instantiates a template and begins execution. + ) -> AsyncOperation: + """Instantiate a template and begins execution. :param template_name: Name of template to instantiate. - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param version: Optional. The version of workflow template to instantiate. If specified, - the workflow will be instantiated only if the current version of - the workflow template has the supplied version. - This option cannot be used to instantiate a previous version of - workflow template. - :param request_id: Optional. A tag that prevents multiple concurrent workflow instances - with the same tag from running. This mitigates risk of concurrent - instances started due to retries. - :param parameters: Optional. Map from parameter names to values that should be used for those - parameters. Values may not exceed 100 characters. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param version: Version of workflow template to instantiate. If + specified, the workflow will be instantiated only if the current + version of the workflow template has the supplied version. This + option cannot be used to instantiate a previous version of workflow + template. + :param request_id: A tag that prevents multiple concurrent workflow + instances with the same tag from running. This mitigates risk of + concurrent instances started due to retries. + :param parameters: Map from parameter names to values that should be + used for those parameters. Values may not exceed 100 characters. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -1501,21 +1506,22 @@ async def instantiate_inline_workflow_template( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), - ): - """ - Instantiates a template and begins execution. - - :param template: The workflow template to instantiate. If a dict is provided, - it must be of the same form as the protobuf message WorkflowTemplate - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param request_id: Optional. A tag that prevents multiple concurrent workflow instances - with the same tag from running. This mitigates risk of concurrent - instances started due to retries. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + ) -> AsyncOperation: + """Instantiate a template and begin execution. + + :param template: The workflow template to instantiate. If a dict is + provided, it must be of the same form as the protobuf message + WorkflowTemplate. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param request_id: A tag that prevents multiple concurrent workflow + instances with the same tag from running. This mitigates risk of + concurrent instances started due to retries. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -1544,16 +1550,16 @@ async def get_job( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Job: - """ - Gets the resource representation for a job in a project. - - :param job_id: Id of the Dataproc job - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Get the resource representation for a job in a project. + + :param job_id: Dataproc job ID. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -1578,20 +1584,20 @@ async def submit_job( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Job: - """ - Submits a job to a cluster. - - :param job: The job resource. If a dict is provided, - it must be of the same form as the protobuf message Job - :param project_id: Required. The ID of the Google Cloud project the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param request_id: Optional. A tag that prevents multiple concurrent workflow instances - with the same tag from running. This mitigates risk of concurrent - instances started due to retries. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Submit a job to a cluster. + + :param job: The job resource. If a dict is provided, it must be of the + same form as the protobuf message Job. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param request_id: A tag that prevents multiple concurrent workflow + instances with the same tag from running. This mitigates risk of + concurrent instances started due to retries. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ if region is None: @@ -1614,16 +1620,16 @@ async def cancel_job( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Job: - """ - Starts a job cancellation request. - - :param project_id: Required. The ID of the Google Cloud project that the job belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param job_id: Required. The job ID. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Start a job cancellation request. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param job_id: The job ID. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_job_client(region=region) @@ -1648,22 +1654,23 @@ async def create_batch( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> AsyncOperation: - """ - Creates a batch workload. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param batch: Required. The batch to create. - :param batch_id: Optional. The ID to use for the batch, which will become the final component - of the batch's resource name. - This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :param request_id: Optional. A unique id used to identify the request. If the server receives two - ``CreateBatchRequest`` requests with the same id, then the second request will be ignored and - the first ``google.longrunning.Operation`` created and stored in the backend is returned. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Create a batch workload. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param batch: The batch to create. + :param batch_id: The ID to use for the batch, which will become the + final component of the batch's resource name. This value must be of + 4-63 characters. Valid characters are ``[a-z][0-9]-``. + :param request_id: A unique id used to identify the request. If the + server receives two *CreateBatchRequest* requests with the same + ID, the second request will be ignored, and an operation created + for the first one and stored in the backend is returned. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_batch_client(region) @@ -1692,18 +1699,16 @@ async def delete_batch( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> None: - """ - Deletes the batch workload resource. - - :param batch_id: Required. The ID to use for the batch, which will become the final component - of the batch's resource name. - This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Delete the batch workload resource. + + :param batch_id: The batch ID. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_batch_client(region) @@ -1728,18 +1733,16 @@ async def get_batch( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ) -> Batch: - """ - Gets the batch workload resource representation. - - :param batch_id: Required. The ID to use for the batch, which will become the final component - of the batch's resource name. - This value must be 4-63 characters. Valid characters are /[a-z][0-9]-/. - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """Get the batch workload resource representation. + + :param batch_id: The batch ID. + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_batch_client(region) @@ -1766,19 +1769,20 @@ async def list_batches( timeout: float | None = None, metadata: Sequence[tuple[str, str]] = (), ): - """ - Lists batch workloads. - - :param project_id: Required. The ID of the Google Cloud project that the cluster belongs to. - :param region: Required. The Cloud Dataproc region in which to handle the request. - :param page_size: Optional. The maximum number of batches to return in each response. The service may - return fewer than this value. The default page size is 20; the maximum page size is 1000. - :param page_token: Optional. A page token received from a previous ``ListBatches`` call. - Provide this token to retrieve the subsequent page. - :param retry: A retry object used to retry requests. If ``None`` is specified, requests will not be - retried. - :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if - ``retry`` is specified, the timeout applies to each individual attempt. + """List batch workloads. + + :param project_id: Google Cloud project ID that the cluster belongs to. + :param region: Cloud Dataproc region to handle the request. + :param page_size: The maximum number of batches to return in each + response. The service may return fewer than this value. The default + page size is 20; the maximum page size is 1000. + :param page_token: A page token received from a previous ``ListBatches`` + call. Provide this token to retrieve the subsequent page. + :param retry: A retry object used to retry requests. If *None*, requests + will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. If *retry* is specified, the timeout applies to each + individual attempt. :param metadata: Additional metadata that is provided to the method. """ client = self.get_batch_client(region) diff --git a/airflow/providers/google/cloud/hooks/functions.py b/airflow/providers/google/cloud/hooks/functions.py index 0a6a9b892df0..a26c6bf56cf8 100644 --- a/airflow/providers/google/cloud/hooks/functions.py +++ b/airflow/providers/google/cloud/hooks/functions.py @@ -32,8 +32,7 @@ class CloudFunctionsHook(GoogleBaseHook): - """ - Hook for the Google Cloud Functions APIs. + """Google Cloud Functions APIs. All the methods in the hook where project_id is used must be called with keyword arguments rather than positional. @@ -61,19 +60,17 @@ def __init__( @staticmethod def _full_location(project_id: str, location: str) -> str: - """ - Retrieve full location of the function in the form of - ``projects//locations/``. + """Retrieve full location of the function. - :param project_id: The Google Cloud Project project_id where the function belongs. + :param project_id: Google Cloud Project ID where the function belongs. :param location: The location where the function is created. - :return: + :return: The full location, in the form of + ``projects//locations/``. """ return f"projects/{project_id}/locations/{location}" def get_conn(self) -> build: - """ - Retrieves the connection to Cloud Functions. + """Retrieve the connection to Cloud Functions. :return: Google Cloud Function services object. """ @@ -85,74 +82,72 @@ def get_conn(self) -> build: return self._conn def get_function(self, name: str) -> dict: - """ - Returns the Cloud Function with the given name. + """Get the Cloud Function with given name. :param name: Name of the function. :return: A Cloud Functions object representing the function. """ - # fmt: off - return self.get_conn().projects().locations().functions().get( - name=name).execute(num_retries=self.num_retries) - # fmt: on + operation = self.get_conn().projects().locations().functions().get(name=name) + return operation.execute(num_retries=self.num_retries) @GoogleBaseHook.fallback_to_default_project_id def create_new_function(self, location: str, body: dict, project_id: str) -> None: - """ - Creates a new function in Cloud Function in the location specified in the body. + """Create a new function at the location specified in the body. :param location: The location of the function. :param body: The body required by the Cloud Functions insert API. - :param project_id: Optional, Google Cloud Project project_id where the function belongs. - If set to None or missing, the default project_id from the Google Cloud connection is used. - :return: None - """ - # fmt: off - response = self.get_conn().projects().locations().functions().create( - location=self._full_location(project_id, location), - body=body - ).execute(num_retries=self.num_retries) - # fmt: on + :param project_id: Google Cloud Project ID where the function belongs. + If set to None or missing, the default project ID from the Google + Cloud connection is used. + """ + operation = ( + self.get_conn() + .projects() + .locations() + .functions() + .create(location=self._full_location(project_id, location), body=body) + ) + response = operation.execute(num_retries=self.num_retries) operation_name = response["name"] self._wait_for_operation_to_complete(operation_name=operation_name) def update_function(self, name: str, body: dict, update_mask: list[str]) -> None: - """ - Updates Cloud Functions according to the specified update mask. + """Update Cloud Functions according to the specified update mask. :param name: The name of the function. :param body: The body required by the cloud function patch API. :param update_mask: The update mask - array of fields that should be patched. - :return: None """ - # fmt: off - response = self.get_conn().projects().locations().functions().patch( - updateMask=",".join(update_mask), - name=name, - body=body - ).execute(num_retries=self.num_retries) - # fmt: on + operation = ( + self.get_conn() + .projects() + .locations() + .functions() + .patch(updateMask=",".join(update_mask), name=name, body=body) + ) + response = operation.execute(num_retries=self.num_retries) operation_name = response["name"] self._wait_for_operation_to_complete(operation_name=operation_name) @GoogleBaseHook.fallback_to_default_project_id def upload_function_zip(self, location: str, zip_path: str, project_id: str) -> str: - """ - Uploads zip file with sources. + """Upload ZIP file with sources. :param location: The location where the function is created. :param zip_path: The path of the valid .zip file to upload. - :param project_id: Optional, Google Cloud Project project_id where the function belongs. - If set to None or missing, the default project_id from the Google Cloud connection is used. + :param project_id: Google Cloud Project ID where the function belongs. + If set to None or missing, the default project ID from the Google + Cloud connection is used. :return: The upload URL that was returned by generateUploadUrl method. """ - # fmt: off - - response = \ - self.get_conn().projects().locations().functions().generateUploadUrl( - parent=self._full_location(project_id, location) - ).execute(num_retries=self.num_retries) - # fmt: on + operation = ( + self.get_conn() + .projects() + .locations() + .functions() + .generateUploadUrl(parent=self._full_location(project_id, location)) + ) + response = operation.execute(num_retries=self.num_retries) upload_url = response.get("uploadUrl") with open(zip_path, "rb") as file: @@ -161,7 +156,6 @@ def upload_function_zip(self, location: str, zip_path: str, project_id: str) -> data=file, # Those two headers needs to be specified according to: # https://cloud.google.com/functions/docs/reference/rest/v1/projects.locations.functions/generateUploadUrl - # nopep8 headers={ "Content-type": "application/zip", "x-goog-content-length-range": "0,104857600", @@ -170,16 +164,12 @@ def upload_function_zip(self, location: str, zip_path: str, project_id: str) -> return upload_url def delete_function(self, name: str) -> None: - """ - Deletes the specified Cloud Function. + """Delete the specified Cloud Function. :param name: The name of the function. - :return: None """ - # fmt: off - response = self.get_conn().projects().locations().functions().delete( - name=name).execute(num_retries=self.num_retries) - # fmt: on + operation = self.get_conn().projects().locations().functions().delete(name=name) + response = operation.execute(num_retries=self.num_retries) operation_name = response["name"] self._wait_for_operation_to_complete(operation_name=operation_name) @@ -191,32 +181,29 @@ def call_function( location: str, project_id: str = PROVIDE_PROJECT_ID, ) -> dict: - """ - Synchronously invokes a deployed Cloud Function. To be used for testing - purposes as very limited traffic is allowed. + """Invoke a deployed Cloud Function. + + This is done synchronously and should only be used for testing purposes, + as very limited traffic is allowed. :param function_id: ID of the function to be called :param input_data: Input to be passed to the function :param location: The location where the function is located. - :param project_id: Optional, Google Cloud Project project_id where the function belongs. - If set to None or missing, the default project_id from the Google Cloud connection is used. - :return: None + :param project_id: Google Cloud Project ID where the function belongs. + If set to None or missing, the default project ID from the Google + Cloud connection is used. """ name = f"projects/{project_id}/locations/{location}/functions/{function_id}" - # fmt: off - response = self.get_conn().projects().locations().functions().call( - name=name, - body=input_data - ).execute(num_retries=self.num_retries) - # fmt: on + operation = self.get_conn().projects().locations().functions().call(name=name, body=input_data) + response = operation.execute(num_retries=self.num_retries) if "error" in response: raise AirflowException(response["error"]) return response def _wait_for_operation_to_complete(self, operation_name: str) -> dict: - """ - Waits for the named operation to complete - checks status of the - asynchronous call. + """Wait for the named operation to complete. + + This is used to check the status of an asynchronous call. :param operation_name: The name of the operation. :return: The response returned by the operation. @@ -224,11 +211,8 @@ def _wait_for_operation_to_complete(self, operation_name: str) -> dict: """ service = self.get_conn() while True: - # fmt: off - operation_response = service.operations().get( - name=operation_name, - ).execute(num_retries=self.num_retries) - # fmt: on + operation = service.operations().get(name=operation_name) + operation_response = operation.execute(num_retries=self.num_retries) if operation_response.get("done"): response = operation_response.get("response") error = operation_response.get("error") diff --git a/airflow/providers/google/cloud/hooks/kubernetes_engine.py b/airflow/providers/google/cloud/hooks/kubernetes_engine.py index fd61809561ac..d8b1d92ffae6 100644 --- a/airflow/providers/google/cloud/hooks/kubernetes_engine.py +++ b/airflow/providers/google/cloud/hooks/kubernetes_engine.py @@ -63,8 +63,7 @@ class GKEHook(GoogleBaseHook): - """ - Hook for managing Google Kubernetes Engine cluster APIs. + """Google Kubernetes Engine cluster APIs. All the methods in the hook where project_id is used must be called with keyword arguments rather than positional. @@ -90,7 +89,7 @@ def __init__( self.location = location def get_cluster_manager_client(self) -> ClusterManagerClient: - """Returns ClusterManagerClient.""" + """Create or get a ClusterManagerClient.""" if self._client is None: self._client = ClusterManagerClient(credentials=self.get_credentials(), client_info=CLIENT_INFO) return self._client @@ -114,13 +113,13 @@ def get_client(self) -> ClusterManagerClient: return self.get_conn() def wait_for_operation(self, operation: Operation, project_id: str | None = None) -> Operation: - """ - Given an operation, continuously fetches the status from Google Cloud until either - completion or an error occurring. + """Continuously fetch the status from Google Cloud. - :param operation: The Operation to wait for - :param project_id: Google Cloud project ID - :return: A new, updated operation fetched from Google Cloud + This is done until the given operation completes, or raises an error. + + :param operation: The Operation to wait for. + :param project_id: Google Cloud project ID. + :return: A new, updated operation fetched from Google Cloud. """ self.log.info("Waiting for OPERATION_NAME %s", operation.name) time.sleep(OPERATIONAL_POLL_INTERVAL) @@ -134,8 +133,7 @@ def wait_for_operation(self, operation: Operation, project_id: str | None = None return operation def get_operation(self, operation_name: str, project_id: str | None = None) -> Operation: - """ - Fetches the operation from Google Cloud. + """Get an operation from Google Cloud. :param operation_name: Name of operation to fetch :param project_id: Google Cloud project ID @@ -150,8 +148,7 @@ def get_operation(self, operation_name: str, project_id: str | None = None) -> O @staticmethod def _append_label(cluster_proto: Cluster, key: str, val: str) -> Cluster: - """ - Append labels to provided Cluster Protobuf. + """Append labels to provided Cluster Protobuf. Labels must fit the regex ``[a-z]([-a-z0-9]*[a-z0-9])?`` (current airflow version string follows semantic versioning spec: x.y.z). @@ -175,24 +172,23 @@ def delete_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, ) -> Operation | None: - """ - Deletes the cluster, including the Kubernetes endpoint and all - worker nodes. Firewalls and routes that were configured during - cluster creation are also deleted. Other Google Compute Engine - resources that might be in use by the cluster (e.g. load balancer - resources) will not be deleted if they were not present at the - initial create time. - - :param name: The name of the cluster to delete - :param project_id: Google Cloud project ID - :param wait_to_complete: A boolean value which makes method to sleep while - operation of deletion is not finished. + """Deletes the cluster, the Kubernetes endpoint, and all worker nodes. + + Firewalls and routes that were configured during cluster creation are + also deleted. Other Google Compute Engine resources that might be in use + by the cluster (e.g. load balancer resources) will not be deleted if + they were not present at the initial create time. + + :param name: The name of the cluster to delete. + :param project_id: Google Cloud project ID. + :param wait_to_complete: If *True*, wait until the deletion is finished + before returning. :param retry: Retry object used to determine when/if to retry requests. If None is specified, requests will not be retried. - :param timeout: The amount of time, in seconds, to wait for the request to - complete. Note that if retry is specified, the timeout applies to each - individual attempt. - :return: The full url to the delete operation if successful, else None + :param timeout: The amount of time, in seconds, to wait for the request + to complete. Note that if retry is specified, the timeout applies to + each individual attempt. + :return: The full url to the delete operation if successful, else None. """ self.log.info("Deleting (project_id=%s, location=%s, cluster_id=%s)", project_id, self.location, name) @@ -219,26 +215,27 @@ def create_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, ) -> Operation | Cluster: - """ - Creates a cluster, consisting of the specified number and type of Google Compute - Engine instances. + """Create a cluster. + + This should consist of the specified number, and the type of Google + Compute Engine instances. :param cluster: A Cluster protobuf or dict. If dict is provided, it must be of the same form as the protobuf message - :class:`google.cloud.container_v1.types.Cluster` - :param project_id: Google Cloud project ID - :param wait_to_complete: A boolean value which makes method to sleep while - operation of creation is not finished. + :class:`google.cloud.container_v1.types.Cluster`. + :param project_id: Google Cloud project ID. + :param wait_to_complete: A boolean value which makes method to sleep + while operation of creation is not finished. :param retry: A retry object (``google.api_core.retry.Retry``) used to - retry requests. - If None is specified, requests will not be retried. - :param timeout: The amount of time, in seconds, to wait for the request to - complete. Note that if retry is specified, the timeout applies to each - individual attempt. - :return: The full url to the new, or existing, cluster - :raises: - ParseError: On JSON parsing problems when trying to convert dict - AirflowException: cluster is not dict type nor Cluster proto type + retry requests. If None is specified, requests will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. Note that if retry is specified, the timeout applies to + each individual attempt. + :return: The full url to the new, or existing, cluster. + :raises ParseError: On JSON parsing problems when trying to convert + dict. + :raises AirflowException: cluster is not dict type nor Cluster proto + type. """ if isinstance(cluster, dict): cluster = Cluster.from_json(json.dumps(cluster)) @@ -273,17 +270,15 @@ def get_cluster( retry: Retry | _MethodDefault = DEFAULT, timeout: float | None = None, ) -> Cluster: - """ - Gets details of specified cluster. + """Get details of specified cluster. - :param name: The name of the cluster to retrieve - :param project_id: Google Cloud project ID - :param retry: A retry object used to retry requests. If None is specified, - requests will not be retried. - :param timeout: The amount of time, in seconds, to wait for the request to - complete. Note that if retry is specified, the timeout applies to each - individual attempt. - :return: google.cloud.container_v1.types.Cluster + :param name: The name of the cluster to retrieve. + :param project_id: Google Cloud project ID. + :param retry: A retry object used to retry requests. If None is + specified, requests will not be retried. + :param timeout: The amount of time, in seconds, to wait for the request + to complete. Note that if retry is specified, the timeout applies to + each individual attempt. """ self.log.info( "Fetching cluster (project_id=%s, location=%s, cluster_name=%s)", @@ -300,7 +295,7 @@ def get_cluster( class GKEAsyncHook(GoogleBaseAsyncHook): - """Hook implemented with usage of asynchronous client of GKE.""" + """Asynchronous client of GKE.""" sync_hook_class = GKEHook @@ -331,8 +326,7 @@ async def get_operation( operation_name: str, project_id: str = PROVIDE_PROJECT_ID, ) -> Operation: - """ - Fetches the operation from Google Cloud. + """Fetch an operation from Google Cloud. :param operation_name: Name of operation to fetch. :param project_id: Google Cloud project ID. @@ -348,7 +342,7 @@ async def get_operation( class GKEPodHook(GoogleBaseHook, PodOperatorHookProtocol): - """Hook for managing Google Kubernetes Engine pod APIs.""" + """Google Kubernetes Engine pod APIs.""" def __init__( self, @@ -377,18 +371,16 @@ def get_namespace(self): """Get the namespace configured by the Airflow connection.""" def _get_namespace(self): - """Implemented for compatibility with KubernetesHook. Deprecated; do not use.""" + """For compatibility with KubernetesHook. Deprecated; do not use.""" def get_xcom_sidecar_container_image(self): - """ - Returns the xcom sidecar image defined in the connection. + """Get the xcom sidecar image defined in the connection. Implemented for compatibility with KubernetesHook. """ def get_xcom_sidecar_container_resources(self): - """ - Returns the xcom sidecar resources defined in the connection. + """Get the xcom sidecar resources defined in the connection. Implemented for compatibility with KubernetesHook. """ @@ -419,8 +411,7 @@ def _get_token(creds: google.auth.credentials.Credentials) -> str: return creds.token def get_pod(self, name: str, namespace: str) -> V1Pod: - """ - Gets pod's object. + """Get a pod object. :param name: Name of the pod. :param namespace: Name of the pod's namespace. @@ -432,31 +423,19 @@ def get_pod(self, name: str, namespace: str) -> V1Pod: class GKEPodAsyncHook(GoogleBaseAsyncHook): - """ - Hook for managing Google Kubernetes Engine pods APIs in asynchronous way. + """Google Kubernetes Engine pods APIs asynchronously. :param cluster_url: The URL pointed to the cluster. - :param ssl_ca_cert: SSL certificate that is used for authentication to the pod. + :param ssl_ca_cert: SSL certificate used for authentication to the pod. """ sync_hook_class = GKEPodHook scopes = ["https://www.googleapis.com/auth/cloud-platform"] - def __init__( - self, - cluster_url: str, - ssl_ca_cert: str, - **kwargs, - ): - + def __init__(self, cluster_url: str, ssl_ca_cert: str, **kwargs) -> None: self._cluster_url = cluster_url self._ssl_ca_cert = ssl_ca_cert - - kwargs.update( - cluster_url=cluster_url, - ssl_ca_cert=ssl_ca_cert, - ) - super().__init__(**kwargs) + super().__init__(cluster_url=cluster_url, ssl_ca_cert=ssl_ca_cert, **kwargs) @contextlib.asynccontextmanager async def get_conn(self, token: Token) -> async_client.ApiClient: # type: ignore[override] @@ -490,8 +469,7 @@ def _get_config(self) -> async_client.configuration.Configuration: return configuration async def get_pod(self, name: str, namespace: str) -> V1Pod: - """ - Gets pod's object. + """Get a pod object. :param name: Name of the pod. :param namespace: Name of the pod's namespace. @@ -506,8 +484,7 @@ async def get_pod(self, name: str, namespace: str) -> V1Pod: return pod async def delete_pod(self, name: str, namespace: str): - """ - Deletes pod's object. + """Delete a pod. :param name: Name of the pod. :param namespace: Name of the pod's namespace. @@ -527,10 +504,12 @@ async def delete_pod(self, name: str, namespace: str): raise async def read_logs(self, name: str, namespace: str): - """ - Reads logs inside the pod while starting containers inside. All the logs will be outputted with its - timestamp to track the logs after the execution of the pod is completed. The method is used for async - output of the logs only in the pod failed it execution or the task was cancelled by the user. + """Read logs inside the pod while starting containers inside. + + All the logs will be outputted with its timestamp to track the logs + after the execution of the pod is completed. The method is used for + async output of the logs only in the pod failed it execution or the task + was cancelled by the user. :param name: Name of the pod. :param namespace: Name of the pod's namespace. diff --git a/airflow/providers/google/cloud/links/datafusion.py b/airflow/providers/google/cloud/links/datafusion.py index 2410ec8feaca..00afd700bdb2 100644 --- a/airflow/providers/google/cloud/links/datafusion.py +++ b/airflow/providers/google/cloud/links/datafusion.py @@ -35,9 +35,10 @@ class BaseGoogleLink(BaseOperatorLink): - """ - Override the base logic to prevent adding 'https://console.cloud.google.com' - in front of every link where uri is used. + """Link for Google operators. + + Prevent adding ``https://console.cloud.google.com`` in front of every link + where URI is used. """ name: ClassVar[str] diff --git a/airflow/providers/google/cloud/operators/bigquery.py b/airflow/providers/google/cloud/operators/bigquery.py index d509b9f4f712..98929b6e6dec 100644 --- a/airflow/providers/google/cloud/operators/bigquery.py +++ b/airflow/providers/google/cloud/operators/bigquery.py @@ -133,17 +133,17 @@ def get_db_hook(self: BigQueryCheckOperator) -> BigQueryHook: # type:ignore[mis class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator): - """ - Performs checks against BigQuery. The ``BigQueryCheckOperator`` expects - a sql query that will return a single row. Each value on that - first row is evaluated using python ``bool`` casting. If any of the - values return ``False`` the check is failed and errors out. + """Performs checks against BigQuery. + + This operator expects a SQL query that returns a single row. Each value on + that row is evaluated using a Python ``bool`` cast. If any of the values + is falsy, the check errors out. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigQueryCheckOperator` - Note that Python bool casting evals the following as ``False``: + Note that Python bool casting evals the following as *False*: * ``False`` * ``0`` @@ -152,36 +152,34 @@ class BigQueryCheckOperator(_BigQueryDbHookMixin, SQLCheckOperator): * Empty dictionary or set (``{}``) Given a query like ``SELECT COUNT(*) FROM foo``, it will fail only if - the count ``== 0``. You can craft much more complex query that could, - for instance, check that the table has the same number of rows as - the source table upstream, or that the count of today's partition is - greater than yesterday's partition, or that a set of metrics are less - than 3 standard deviation for the 7 day average. - - This operator can be used as a data quality check in your pipeline, and - depending on where you put it in your DAG, you have the choice to - stop the critical path, preventing from - publishing dubious data, or on the side and receive email alerts - without stopping the progress of the DAG. - - :param sql: the sql to be executed - :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. - :param use_legacy_sql: Whether to use legacy SQL (true) - or standard SQL (false). + the count equals to zero. You can craft much more complex query that could, + for instance, check that the table has the same number of rows as the source + table upstream, or that the count of today's partition is greater than + yesterday's partition, or that a set of metrics are less than three standard + deviation for the 7-day average. + + This operator can be used as a data quality check in your pipeline. + Depending on where you put it in your DAG, you have the choice to stop the + critical path, preventing from publishing dubious data, or on the side and + receive email alerts without stopping the progress of the DAG. + + :param sql: SQL to execute. + :param gcp_conn_id: Connection ID for Google Cloud. + :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false). :param location: The geographic location of the job. See details at: https://cloud.google.com/bigquery/docs/locations#specifying_your_location - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - :param labels: a dictionary containing labels for the table, passed to BigQuery - :param deferrable: Run operator in the deferrable mode - :param poll_interval: (Deferrable mode only) polling period in seconds to check for the status of job. - Defaults to 4 seconds. + :param impersonation_chain: Optional service account to impersonate using + short-term credentials, or chained list of accounts required to get the + access token of the last account in the list, which will be impersonated + in the request. If set as a string, the account must grant the + originating account the Service Account Token Creator IAM role. If set + as a sequence, the identities from the list must grant Service Account + Token Creator IAM role to the directly preceding identity, with first + account from the list granting this role to the originating account. (templated) + :param labels: a dictionary containing labels for the table, passed to BigQuery. + :param deferrable: Run operator in the deferrable mode. + :param poll_interval: (Deferrable mode only) polling period in seconds to + check for the status of job. """ template_fields: Sequence[str] = ( @@ -255,10 +253,10 @@ def execute(self, context: Context): self.log.info("Current state of job %s is %s", job.job_id, job.state) def execute_complete(self, context: Context, event: dict[str, Any]) -> None: - """ - Callback for when the trigger fires - returns immediately. - Relies on trigger to throw an exception, otherwise it assumes execution was - successful. + """Callback for when the trigger fires. + + This returns immediately. It relies on trigger to throw an exception, + otherwise it assumes execution was successful. """ if event["status"] == "error": raise AirflowException(event["message"]) @@ -275,31 +273,30 @@ def execute_complete(self, context: Context, event: dict[str, Any]) -> None: class BigQueryValueCheckOperator(_BigQueryDbHookMixin, SQLValueCheckOperator): - """ - Performs a simple value check using sql code. + """Perform a simple value check using sql code. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:BigQueryValueCheckOperator` - :param sql: the sql to be executed + :param sql: SQL to execute. :param use_legacy_sql: Whether to use legacy SQL (true) or standard SQL (false). :param gcp_conn_id: (Optional) The connection ID used to connect to Google Cloud. :param location: The geographic location of the job. See details at: https://cloud.google.com/bigquery/docs/locations#specifying_your_location - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first - account from the list granting this role to the originating account (templated). - :param labels: a dictionary containing labels for the table, passed to BigQuery - :param deferrable: Run operator in the deferrable mode - :param poll_interval: (Deferrable mode only) polling period in seconds to check for the status of job. - Defaults to 4 seconds. + :param impersonation_chain: Optional service account to impersonate using + short-term credentials, or chained list of accounts required to get the + access token of the last account in the list, which will be impersonated + in the request. If set as a string, the account must grant the + originating account the Service Account Token Creator IAM role. If set + as a sequence, the identities from the list must grant Service Account + Token Creator IAM role to the directly preceding identity, with first + account from the list granting this role to the originating account. (templated) + :param labels: a dictionary containing labels for the table, passed to BigQuery. + :param deferrable: Run operator in the deferrable mode. + :param poll_interval: (Deferrable mode only) polling period in seconds to + check for the status of job. """ template_fields: Sequence[str] = ( @@ -388,10 +385,10 @@ def _handle_job_error(job: BigQueryJob | UnknownJob) -> None: raise AirflowException(f"BigQuery job {job.job_id} failed: {job.error_result}") def execute_complete(self, context: Context, event: dict[str, Any]) -> None: - """ - Callback for when the trigger fires - returns immediately. - Relies on trigger to throw an exception, otherwise it assumes execution was - successful. + """Callback for when the trigger fires. + + This returns immediately. It relies on trigger to throw an exception, + otherwise it assumes execution was successful. """ if event["status"] == "error": raise AirflowException(event["message"]) @@ -531,10 +528,10 @@ def execute(self, context: Context): ) def execute_complete(self, context: Context, event: dict[str, Any]) -> None: - """ - Callback for when the trigger fires - returns immediately. - Relies on trigger to throw an exception, otherwise it assumes execution was - successful. + """Callback for when the trigger fires. + + This returns immediately. It relies on trigger to throw an exception, + otherwise it assumes execution was successful. """ if event["status"] == "error": raise AirflowException(event["message"]) @@ -798,7 +795,7 @@ class BigQueryGetDataOperator(GoogleCloudBaseOperator): ``[A,B,C]`` and you pass 'B,A' in the ``selected_fields`` the data would still be of the form ``'A,B'``. - **Example**: :: + **Example**:: get_data = BigQueryGetDataOperator( task_id='get_data_from_bq', @@ -895,12 +892,7 @@ def _submit_job( ) def generate_query(self, hook: BigQueryHook) -> str: - """ - Generate a select query if selected fields are given or with * - for the given dataset and table id. - - :param hook BigQuery Hook - """ + """Generate a SELECT query if for the given dataset and table ID.""" query = "select " if self.selected_fields: query += self.selected_fields @@ -976,10 +968,10 @@ def execute(self, context: Context): ) def execute_complete(self, context: Context, event: dict[str, Any]) -> Any: - """ - Callback for when the trigger fires - returns immediately. - Relies on trigger to throw an exception, otherwise it assumes execution was - successful. + """Callback for when the trigger fires. + + This returns immediately. It relies on trigger to throw an exception, + otherwise it assumes execution was successful. """ if event["status"] == "error": raise AirflowException(event["message"]) @@ -989,12 +981,13 @@ def execute_complete(self, context: Context, event: dict[str, Any]) -> Any: class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator): - """ - Executes BigQuery SQL queries in a specific BigQuery database. - This operator does not assert idempotency. + """Executes BigQuery SQL queries in a specific BigQuery database. + + This operator is deprecated. Please use + :class:`airflow.providers.google.cloud.operators.bigquery.BigQueryInsertJobOperator` + instead. - This operator is deprecated. - Please use :class:`airflow.providers.google.cloud.operators.bigquery.BigQueryInsertJobOperator` + This operator does not assert idempotency. :param sql: the SQL code to be executed as a single string, or a list of str (sql statements), or a reference to a template file. @@ -1050,10 +1043,11 @@ class BigQueryExecuteQueryOperator(GoogleCloudBaseOperator): US and EU. See details at https://cloud.google.com/bigquery/docs/locations#specifying_your_location :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: + + .. code-block:: python encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", } :param impersonation_chain: Optional service account to impersonate using short-term credentials, or chained list of accounts required to get the access_token @@ -1207,9 +1201,7 @@ def on_kill(self) -> None: class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator): - """ - Creates a new, empty table in the specified BigQuery dataset, - optionally with schema. + """Creates a new table in the specified BigQuery dataset, optionally with schema. The schema to be used for the BigQuery table may be specified in one of two ways. You may either directly pass the schema fields in, or you may @@ -1230,7 +1222,7 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator): :param schema_fields: If set, the schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema - **Example**: :: + **Example**:: schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}] @@ -1249,45 +1241,46 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator): and interact with the Google Cloud Storage service. :param labels: a dictionary containing labels for the table, passed to BigQuery - **Example (with schema JSON in GCS)**: :: + **Example (with schema JSON in GCS)**:: - CreateTable = BigQueryCreateEmptyTableOperator( - task_id='BigQueryCreateEmptyTableOperator_task', - dataset_id='ODS', - table_id='Employees', - project_id='internal-gcp-project', - gcs_schema_object='gs://schema-bucket/employee_schema.json', - gcp_conn_id='airflow-conn-id', - google_cloud_storage_conn_id='airflow-conn-id' - ) + CreateTable = BigQueryCreateEmptyTableOperator( + task_id='BigQueryCreateEmptyTableOperator_task', + dataset_id='ODS', + table_id='Employees', + project_id='internal-gcp-project', + gcs_schema_object='gs://schema-bucket/employee_schema.json', + gcp_conn_id='airflow-conn-id', + google_cloud_storage_conn_id='airflow-conn-id' + ) - **Corresponding Schema file** (``employee_schema.json``): :: + **Corresponding Schema file** (``employee_schema.json``):: - [ - { - "mode": "NULLABLE", - "name": "emp_name", - "type": "STRING" - }, - { - "mode": "REQUIRED", - "name": "salary", - "type": "INTEGER" - } - ] + [ + { + "mode": "NULLABLE", + "name": "emp_name", + "type": "STRING" + }, + { + "mode": "REQUIRED", + "name": "salary", + "type": "INTEGER" + } + ] + + **Example (with schema in the DAG)**:: + + CreateTable = BigQueryCreateEmptyTableOperator( + task_id='BigQueryCreateEmptyTableOperator_task', + dataset_id='ODS', + table_id='Employees', + project_id='internal-gcp-project', + schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}], + gcp_conn_id='airflow-conn-id-account', + google_cloud_storage_conn_id='airflow-conn-id' + ) - **Example (with schema in the DAG)**: :: - - CreateTable = BigQueryCreateEmptyTableOperator( - task_id='BigQueryCreateEmptyTableOperator_task', - dataset_id='ODS', - table_id='Employees', - project_id='internal-gcp-project', - schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, - {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}], - gcp_conn_id='airflow-conn-id-account', - google_cloud_storage_conn_id='airflow-conn-id' - ) :param view: [Optional] A dictionary containing definition for the view. If set, it will create a view instead of a table: @@ -1295,10 +1288,11 @@ class BigQueryCreateEmptyTableOperator(GoogleCloudBaseOperator): https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ViewDefinition :param materialized_view: [Optional] The materialized view definition. :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: + + .. code-block:: python encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", } :param location: The location used for the operation. :param cluster_fields: [Optional] The fields used for clustering. @@ -1459,9 +1453,7 @@ def execute(self, context: Context) -> None: class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator): - """ - Creates a new external table in the dataset with the data from Google Cloud - Storage. + """Create a new external table with data from Google Cloud Storage. The schema to be used for the BigQuery table may be specified in one of two ways. You may either directly pass the schema fields in, or you may @@ -1481,7 +1473,7 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator): :param schema_fields: If set, the schema field list as defined here: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs#configuration.load.schema - **Example**: :: + **Example**:: schema_fields=[{"name": "emp_name", "type": "STRING", "mode": "REQUIRED"}, {"name": "salary", "type": "INTEGER", "mode": "NULLABLE"}] @@ -1521,10 +1513,11 @@ class BigQueryCreateExternalTableOperator(GoogleCloudBaseOperator): :param src_fmt_configs: configure optional fields specific to the source format :param labels: a dictionary containing labels for the table, passed to BigQuery :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: + + .. code-block:: python encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", } :param location: The location used for the operation. :param impersonation_chain: Optional service account to impersonate using short-term @@ -1748,8 +1741,7 @@ def execute(self, context: Context) -> None: class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator): - """ - This operator deletes an existing dataset from your Project in Big query. + """Delete an existing dataset from your Project in BigQuery. https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/delete @@ -1773,7 +1765,7 @@ class BigQueryDeleteDatasetOperator(GoogleCloudBaseOperator): Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - **Example**: :: + **Example**:: delete_temp_data = BigQueryDeleteDatasetOperator( dataset_id='temp-dataset', @@ -1823,8 +1815,7 @@ def execute(self, context: Context) -> None: class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator): - """ - This operator is used to create new dataset for your Project in BigQuery. + """Create a new dataset for your Project in BigQuery. https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets#resource @@ -1850,7 +1841,7 @@ class BigQueryCreateEmptyDatasetOperator(GoogleCloudBaseOperator): :param if_exists: What should Airflow do if the dataset exists. If set to `log`, the TI will be passed to success and an error message will be logged. Set to `ignore` to ignore the error, set to `fail` to fail the TI, and set to `skip` to skip it. - **Example**: :: + **Example**:: create_new_dataset = BigQueryCreateEmptyDatasetOperator( dataset_id='new-dataset', @@ -1947,8 +1938,7 @@ def execute(self, context: Context) -> None: class BigQueryGetDatasetOperator(GoogleCloudBaseOperator): - """ - This operator is used to return the dataset specified by dataset_id. + """Get the dataset specified by ID. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -2012,8 +2002,7 @@ def execute(self, context: Context): class BigQueryGetDatasetTablesOperator(GoogleCloudBaseOperator): - """ - This operator retrieves the list of tables in the specified dataset. + """Retrieve the list of tables in the specified dataset. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -2072,12 +2061,13 @@ def execute(self, context: Context): class BigQueryPatchDatasetOperator(GoogleCloudBaseOperator): - """ - This operator is used to patch dataset for your Project in BigQuery. - It only replaces fields that are provided in the submitted dataset resource. + """Patch a dataset for your Project in BigQuery. + + This operator is deprecated. Please use + :class:`airflow.providers.google.cloud.operators.bigquery.BigQueryUpdateTableOperator` + instead. - This operator is deprecated. - Please use :class:`airflow.providers.google.cloud.operators.bigquery.BigQueryUpdateTableOperator` + Only replaces fields that are provided in the submitted dataset resource. :param dataset_id: The id of dataset. Don't need to provide, if datasetId in dataset_reference. @@ -2140,8 +2130,8 @@ def execute(self, context: Context): class BigQueryUpdateTableOperator(GoogleCloudBaseOperator): - """ - This operator is used to update table for your Project in BigQuery. + """Update a table for your Project in BigQuery. + Use ``fields`` to specify which fields of table to update. If a field is listed in ``fields`` and is ``None`` in table, it will be deleted. @@ -2227,8 +2217,8 @@ def execute(self, context: Context): class BigQueryUpdateDatasetOperator(GoogleCloudBaseOperator): - """ - This operator is used to update dataset for your Project in BigQuery. + """Update a dataset for your Project in BigQuery. + Use ``fields`` to specify which fields of dataset to update. If a field is listed in ``fields`` and is ``None`` in dataset, it will be deleted. If no ``fields`` are provided then all fields of provided ``dataset_resource`` @@ -2309,8 +2299,7 @@ def execute(self, context: Context): class BigQueryDeleteTableOperator(GoogleCloudBaseOperator): - """ - Deletes BigQuery tables. + """Delete a BigQuery table. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -2368,8 +2357,7 @@ def execute(self, context: Context) -> None: class BigQueryUpsertTableOperator(GoogleCloudBaseOperator): - """ - Upsert BigQuery table. + """Upsert to a BigQuery table. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -2446,8 +2434,8 @@ def execute(self, context: Context) -> None: class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator): - """ - Update BigQuery Table Schema + """Update BigQuery Table Schema. + Updates fields on a table schema based on contents of the supplied schema_fields_updates parameter. The supplied schema does not need to be complete, if the field already exists in the schema you only need to supply keys & values for the @@ -2460,16 +2448,22 @@ class BigQueryUpdateTableSchemaOperator(GoogleCloudBaseOperator): :param schema_fields_updates: a partial schema resource. see https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableSchema - **Example**: :: + .. code-block:: python - schema_fields_updates=[ - {"name": "emp_name", "description": "Some New Description"}, - {"name": "salary", "policyTags": {'names': ['some_new_policy_tag']},}, - {"name": "departments", "fields": [ - {"name": "name", "description": "Some New Description"}, - {"name": "type", "description": "Some New Description"} - ]}, - ] + schema_fields_updates = [ + {"name": "emp_name", "description": "Some New Description"}, + { + "name": "salary", + "policyTags": {"names": ["some_new_policy_tag"]}, + }, + { + "name": "departments", + "fields": [ + {"name": "name", "description": "Some New Description"}, + {"name": "type", "description": "Some New Description"}, + ], + }, + ] :param include_policy_tags: (Optional) If set to True policy tags will be included in the update request which requires special permissions even if unchanged (default False) @@ -2549,9 +2543,9 @@ def execute(self, context: Context): class BigQueryInsertJobOperator(GoogleCloudBaseOperator): - """ - Executes a BigQuery job. Waits for the job to complete and returns job id. + """Execute a BigQuery job. + Waits for the job to complete and returns job id. This operator work in the following way: - it calculates a unique hash of the job using job's configuration or uuid if ``force_rerun`` is True @@ -2767,10 +2761,10 @@ def execute(self, context: Any): self._handle_job_error(job) def execute_complete(self, context: Context, event: dict[str, Any]): - """ - Callback for when the trigger fires - returns immediately. - Relies on trigger to throw an exception, otherwise it assumes execution was - successful. + """Callback for when the trigger fires. + + This returns immediately. It relies on trigger to throw an exception, + otherwise it assumes execution was successful. """ if event["status"] == "error": raise AirflowException(event["message"]) diff --git a/airflow/providers/google/cloud/operators/cloud_sql.py b/airflow/providers/google/cloud/operators/cloud_sql.py index aa8e7f526eeb..20a254b954ff 100644 --- a/airflow/providers/google/cloud/operators/cloud_sql.py +++ b/airflow/providers/google/cloud/operators/cloud_sql.py @@ -215,8 +215,7 @@ class CloudSQLBaseOperator(GoogleCloudBaseOperator): - """ - Abstract base operator for Google Cloud SQL operators to inherit from. + """Abstract base operator for Google Cloud SQL operators. :param instance: Cloud SQL instance ID. This does not include the project ID. :param project_id: Optional, Google Cloud Project ID. f set to None or missing, @@ -284,8 +283,8 @@ def _get_settings_version(instance): class CloudSQLCreateInstanceOperator(CloudSQLBaseOperator): - """ - Creates a new Cloud SQL instance. + """Create a new Cloud SQL instance. + If an instance with the same name exists, no action will be taken and the operator will succeed. @@ -385,8 +384,7 @@ def execute(self, context: Context) -> None: class CloudSQLInstancePatchOperator(CloudSQLBaseOperator): - """ - Updates settings of a Cloud SQL instance. + """Update settings of a Cloud SQL instance. Caution: This is a partial update, so only included values for the settings will be updated. @@ -478,8 +476,7 @@ def execute(self, context: Context): class CloudSQLDeleteInstanceOperator(CloudSQLBaseOperator): - """ - Deletes a Cloud SQL instance. + """Delete a Cloud SQL instance. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -525,8 +522,7 @@ def execute(self, context: Context) -> bool | None: class CloudSQLCloneInstanceOperator(CloudSQLBaseOperator): - """ - Clones an instance to a target instance. + """Clone an instance to a target instance. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -617,8 +613,7 @@ def execute(self, context: Context): class CloudSQLCreateInstanceDatabaseOperator(CloudSQLBaseOperator): - """ - Creates a new database inside a Cloud SQL instance. + """Create a new database inside a Cloud SQL instance. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -722,9 +717,7 @@ def execute(self, context: Context) -> bool | None: class CloudSQLPatchInstanceDatabaseOperator(CloudSQLBaseOperator): - """ - Updates a resource containing information about a database inside a Cloud SQL - instance using patch semantics. + """Update resource containing information about a database using patch semantics. See: https://cloud.google.com/sql/docs/mysql/admin-api/how-tos/performance#patch @@ -827,8 +820,7 @@ def execute(self, context: Context) -> None: class CloudSQLDeleteInstanceDatabaseOperator(CloudSQLBaseOperator): - """ - Deletes a database from a Cloud SQL instance. + """Delete a database from a Cloud SQL instance. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -907,9 +899,9 @@ def execute(self, context: Context) -> bool | None: class CloudSQLExportInstanceOperator(CloudSQLBaseOperator): - """ - Exports data from a Cloud SQL instance to a Cloud Storage bucket as a SQL dump - or CSV file. + """Export data from a Cloud SQL instance to a Cloud Storage bucket. + + The exported format can be a SQL dump or CSV file. Note: This operator is idempotent. If executed multiple times with the same export file URI, the export file in GCS will simply be overridden. @@ -1006,17 +998,18 @@ def execute(self, context: Context) -> None: class CloudSQLImportInstanceOperator(CloudSQLBaseOperator): - """ - Imports data into a Cloud SQL instance from a SQL dump or CSV file in Cloud Storage. + """Import data into a Cloud SQL instance from Cloud Storage. - CSV IMPORT: + CSV IMPORT + `````````` This operator is NOT idempotent for a CSV import. If the same file is imported multiple times, the imported data will be duplicated in the database. Moreover, if there are any unique constraints the duplicate import may result in an error. - SQL IMPORT: + SQL IMPORT + `````````` This operator is idempotent for a SQL import if it was also exported by Cloud SQL. The exported SQL contains 'DROP TABLE IF EXISTS' statements for all tables @@ -1117,9 +1110,10 @@ def execute(self, context: Context) -> None: class CloudSQLExecuteQueryOperator(GoogleCloudBaseOperator): - """ - Performs DML or DDL query on an existing Cloud Sql instance. It optionally uses - cloud-sql-proxy to establish secure connection with the database. + """Perform DML or DDL query on an existing Cloud Sql instance. + + It optionally uses cloud-sql-proxy to establish secure connection with the + database. .. seealso:: For more information on how to use this operator, take a look at the guide: diff --git a/airflow/providers/google/cloud/operators/dataflow.py b/airflow/providers/google/cloud/operators/dataflow.py index ef9333db3870..5ae1115a34ae 100644 --- a/airflow/providers/google/cloud/operators/dataflow.py +++ b/airflow/providers/google/cloud/operators/dataflow.py @@ -172,7 +172,9 @@ class DataflowCreateJavaJobOperator(GoogleCloudBaseOperator): This class is deprecated. Please use `providers.apache.beam.operators.beam.BeamRunJavaPipelineOperator`. - **Example**: :: + Example usage: + + .. code-block:: python default_args = { "owner": "airflow", diff --git a/airflow/providers/google/cloud/operators/dataform.py b/airflow/providers/google/cloud/operators/dataform.py index 4b61f974c9e8..005103e44195 100644 --- a/airflow/providers/google/cloud/operators/dataform.py +++ b/airflow/providers/google/cloud/operators/dataform.py @@ -1029,8 +1029,9 @@ def execute(self, context: Context) -> None: class DataformInstallNpmPackagesOperator(GoogleCloudBaseOperator): - """ - Installs npm dependencies in the provided workspace. Requires "package.json" to be created in workspace. + """Install NPM dependencies in the provided workspace. + + Requires "package.json" to be created in the workspace. :param project_id: Required. The ID of the Google Cloud project where workspace located. :param region: Required. The ID of the Google Cloud region where workspace located. diff --git a/airflow/providers/google/cloud/operators/dataproc.py b/airflow/providers/google/cloud/operators/dataproc.py index 0e6d3bd1c052..ff9a58cc34f8 100644 --- a/airflow/providers/google/cloud/operators/dataproc.py +++ b/airflow/providers/google/cloud/operators/dataproc.py @@ -64,8 +64,7 @@ class ClusterGenerator: - """ - Create a new Dataproc Cluster. + """Create a new Dataproc Cluster. :param cluster_name: The name of the DataProc cluster to create. (templated) :param project_id: The ID of the google cloud project in which @@ -395,9 +394,12 @@ def make(self): class DataprocCreateClusterOperator(GoogleCloudBaseOperator): - """ - Create a new cluster on Google Cloud Dataproc. The operator will wait until the - creation is successful or an error occurs in the creation process. + """Create a new cluster on Google Cloud Dataproc. + + The operator will wait until the creation is successful or an error occurs + in the creation process. + + If the cluster already exists and ``use_if_exists`` is True, the operator will: If the cluster already exists and ``use_if_exists`` is True then the operator will: - if cluster state is ERROR then delete it if specified and raise error @@ -668,20 +670,22 @@ def execute_complete(self, context: Context, event: dict[str, Any]) -> Any: class DataprocScaleClusterOperator(GoogleCloudBaseOperator): - """ - Scale, up or down, a cluster on Google Cloud Dataproc. + """Scale, up or down, a cluster on Google Cloud Dataproc. + The operator will wait until the cluster is re-scaled. - **Example**: :: + Example usage: + + .. code-block:: python t1 = DataprocClusterScaleOperator( - task_id='dataproc_scale', - project_id='my-project', - cluster_name='cluster-1', - num_workers=10, - num_preemptible_workers=10, - graceful_decommission_timeout='1h', - dag=dag) + task_id="dataproc_scale", + project_id="my-project", + cluster_name="cluster-1", + num_workers=10, + num_preemptible_workers=10, + graceful_decommission_timeout="1h", + ) .. seealso:: For more detail on about scaling clusters have a look at the reference: @@ -804,8 +808,7 @@ def execute(self, context: Context) -> None: class DataprocDeleteClusterOperator(GoogleCloudBaseOperator): - """ - Deletes a cluster in a project. + """Delete a cluster in a project. :param region: Required. The Cloud Dataproc region in which to handle the request (templated). :param cluster_name: Required. The cluster name (templated). @@ -917,8 +920,7 @@ def _delete_cluster(self, hook: DataprocHook): class DataprocJobBaseOperator(GoogleCloudBaseOperator): - """ - The base class for operators that launch job on DataProc. + """Base class for operators that launch job on DataProc. :param region: The specified region where the dataproc cluster is created. :param job_name: The job name used in the DataProc cluster. This name by default @@ -1095,9 +1097,9 @@ def on_kill(self) -> None: class DataprocSubmitPigJobOperator(DataprocJobBaseOperator): - """ - Start a Pig query Job on a Cloud DataProc cluster. The parameters of the operation - will be passed to the cluster. + """Start a Pig query Job on a Cloud DataProc cluster. + + The parameters of the operation will be passed to the cluster. It's a good practice to define dataproc_* parameters in the default_args of the dag like the cluster name and UDFs. @@ -1116,13 +1118,13 @@ class DataprocSubmitPigJobOperator(DataprocJobBaseOperator): variables for the pig script to be resolved on the cluster or use the parameters to be resolved in the script as template parameters. - **Example**: :: + .. code-block:: python t1 = DataProcPigOperator( - task_id='dataproc_pig', - query='a_pig_script.pig', - variables={'out': 'gs://example/output/{{ds}}'}, - dag=dag) + task_id="dataproc_pig", + query="a_pig_script.pig", + variables={"out": "gs://example/output/{{ds}}"}, + ) .. seealso:: For more detail on about job submission have a look at the reference: @@ -1203,8 +1205,7 @@ def execute(self, context: Context): class DataprocSubmitHiveJobOperator(DataprocJobBaseOperator): - """ - Start a Hive query Job on a Cloud DataProc cluster. + """Start a Hive query Job on a Cloud DataProc cluster. :param query: The query or reference to the query file (q extension). :param query_uri: The HCFS URI of the script that contains the Hive queries. @@ -1278,8 +1279,7 @@ def execute(self, context: Context): class DataprocSubmitSparkSqlJobOperator(DataprocJobBaseOperator): - """ - Start a Spark SQL query Job on a Cloud DataProc cluster. + """Start a Spark SQL query Job on a Cloud DataProc cluster. :param query: The query or reference to the query file (q extension). (templated) :param query_uri: The HCFS URI of the script that contains the SQL queries. @@ -1352,8 +1352,7 @@ def execute(self, context: Context): class DataprocSubmitSparkJobOperator(DataprocJobBaseOperator): - """ - Start a Spark Job on a Cloud DataProc cluster. + """Start a Spark Job on a Cloud DataProc cluster. :param main_jar: The HCFS URI of the jar file that contains the main class (use this or the main_class, not both together). @@ -1426,8 +1425,7 @@ def execute(self, context: Context): class DataprocSubmitHadoopJobOperator(DataprocJobBaseOperator): - """ - Start a Hadoop Job on a Cloud DataProc cluster. + """Start a Hadoop Job on a Cloud DataProc cluster. :param main_jar: The HCFS URI of the jar file containing the main class (use this or the main_class, not both together). @@ -1478,8 +1476,7 @@ def __init__( self.files = files def generate_job(self): - """ - Helper method for easier migration to `DataprocSubmitJobOperator`. + """Helper method for easier migration to `DataprocSubmitJobOperator`. :return: Dict representing Dataproc job """ @@ -1500,8 +1497,7 @@ def execute(self, context: Context): class DataprocSubmitPySparkJobOperator(DataprocJobBaseOperator): - """ - Start a PySpark Job on a Cloud DataProc cluster. + """Start a PySpark Job on a Cloud DataProc cluster. :param main: [Required] The Hadoop Compatible Filesystem (HCFS) URI of the main Python file to use as the driver. Must be a .py file. (templated) @@ -1577,8 +1573,7 @@ def __init__( self.pyfiles = pyfiles def generate_job(self): - """ - Helper method for easier migration to `DataprocSubmitJobOperator`. + """Helper method for easier migration to :class:`DataprocSubmitJobOperator`. :return: Dict representing Dataproc job """ @@ -1617,8 +1612,7 @@ def execute(self, context: Context): class DataprocCreateWorkflowTemplateOperator(GoogleCloudBaseOperator): - """ - Creates new workflow template. + """Creates new workflow template. :param project_id: Optional. The ID of the Google Cloud project the cluster belongs to. :param region: Required. The Cloud Dataproc region in which to handle the request. @@ -1682,9 +1676,9 @@ def execute(self, context: Context): class DataprocInstantiateWorkflowTemplateOperator(GoogleCloudBaseOperator): - """ - Instantiate a WorkflowTemplate on Google Cloud Dataproc. The operator will wait - until the WorkflowTemplate is finished executing. + """Instantiate a WorkflowTemplate on Google Cloud Dataproc. + + The operator will wait until the WorkflowTemplate is finished executing. .. seealso:: Please refer to: @@ -1796,10 +1790,10 @@ def execute(self, context: Context): ) def execute_complete(self, context, event=None) -> None: - """ - Callback for when the trigger fires - returns immediately. - Relies on trigger to throw an exception, otherwise it assumes execution was - successful. + """Callback for when the trigger fires. + + This returns immediately. It relies on trigger to throw an exception, + otherwise it assumes execution was successful. """ if event["status"] == "failed" or event["status"] == "error": self.log.exception("Unexpected error in the operation.") @@ -1809,9 +1803,9 @@ def execute_complete(self, context, event=None) -> None: class DataprocInstantiateInlineWorkflowTemplateOperator(GoogleCloudBaseOperator): - """ - Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. The operator will - wait until the WorkflowTemplate is finished executing. + """Instantiate a WorkflowTemplate Inline on Google Cloud Dataproc. + + The operator will wait until the WorkflowTemplate is finished executing. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -1921,10 +1915,10 @@ def execute(self, context: Context): ) def execute_complete(self, context, event=None) -> None: - """ - Callback for when the trigger fires - returns immediately. - Relies on trigger to throw an exception, otherwise it assumes execution was - successful. + """Callback for when the trigger fires. + + This returns immediately. It relies on trigger to throw an exception, + otherwise it assumes execution was successful. """ if event["status"] == "failed" or event["status"] == "error": self.log.exception("Unexpected error in the operation.") @@ -1934,8 +1928,7 @@ def execute_complete(self, context, event=None) -> None: class DataprocSubmitJobOperator(GoogleCloudBaseOperator): - """ - Submits a job to a cluster. + """Submit a job to a cluster. :param project_id: Optional. The ID of the Google Cloud project that the job belongs to. :param region: Required. The Cloud Dataproc region in which to handle the request. @@ -2063,10 +2056,10 @@ def execute(self, context: Context): return self.job_id def execute_complete(self, context, event=None) -> None: - """ - Callback for when the trigger fires - returns immediately. - Relies on trigger to throw an exception, otherwise it assumes execution was - successful. + """Callback for when the trigger fires. + + This returns immediately. It relies on trigger to throw an exception, + otherwise it assumes execution was successful. """ job_state = event["job_state"] job_id = event["job_id"] @@ -2083,8 +2076,7 @@ def on_kill(self): class DataprocUpdateClusterOperator(GoogleCloudBaseOperator): - """ - Updates a cluster in a project. + """Update a cluster in a project. :param region: Required. The Cloud Dataproc region in which to handle the request. :param project_id: Optional. The ID of the Google Cloud project the cluster belongs to. @@ -2222,8 +2214,7 @@ def execute_complete(self, context: Context, event: dict[str, Any]) -> Any: class DataprocCreateBatchOperator(GoogleCloudBaseOperator): - """ - Creates a batch workload. + """Create a batch workload. :param project_id: Optional. The ID of the Google Cloud project that the cluster belongs to. (templated) :param region: Required. The Cloud Dataproc region in which to handle the request. (templated) @@ -2391,10 +2382,10 @@ def execute(self, context: Context): return Batch.to_dict(result) def execute_complete(self, context, event=None) -> None: - """ - Callback for when the trigger fires - returns immediately. - Relies on trigger to throw an exception, otherwise it assumes execution was - successful. + """Callback for when the trigger fires. + + This returns immediately. It relies on trigger to throw an exception, + otherwise it assumes execution was successful. """ if event is None: raise AirflowException("Batch failed.") @@ -2413,8 +2404,7 @@ def on_kill(self): class DataprocDeleteBatchOperator(GoogleCloudBaseOperator): - """ - Deletes the batch workload resource. + """Delete the batch workload resource. :param batch_id: Required. The ID to use for the batch, which will become the final component of the batch's resource name. @@ -2477,8 +2467,7 @@ def execute(self, context: Context): class DataprocGetBatchOperator(GoogleCloudBaseOperator): - """ - Gets the batch workload resource representation. + """Get the batch workload resource representation. :param batch_id: Required. The ID to use for the batch, which will become the final component of the batch's resource name. @@ -2545,8 +2534,7 @@ def execute(self, context: Context): class DataprocListBatchesOperator(GoogleCloudBaseOperator): - """ - Lists batch workloads. + """List batch workloads. :param region: Required. The Cloud Dataproc region in which to handle the request. :param project_id: Optional. The ID of the Google Cloud project that the cluster belongs to. @@ -2568,7 +2556,6 @@ class DataprocListBatchesOperator(GoogleCloudBaseOperator): If set as a sequence, the identities from the list must grant Service Account Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account (templated). - """ template_fields: Sequence[str] = ("region", "project_id", "impersonation_chain") @@ -2615,8 +2602,7 @@ def execute(self, context: Context): class DataprocCancelOperationOperator(GoogleCloudBaseOperator): - """ - Cancel the batch workload resource. + """Cancel the batch workload resource. :param operation_name: Required. The name of the operation resource to be cancelled. :param region: Required. The Cloud Dataproc region in which to handle the request. diff --git a/airflow/providers/google/cloud/operators/dataproc_metastore.py b/airflow/providers/google/cloud/operators/dataproc_metastore.py index 3cdc6cf9c956..25ef15318f85 100644 --- a/airflow/providers/google/cloud/operators/dataproc_metastore.py +++ b/airflow/providers/google/cloud/operators/dataproc_metastore.py @@ -145,8 +145,7 @@ def get_link( class DataprocMetastoreCreateBackupOperator(GoogleCloudBaseOperator): - """ - Creates a new backup in a given project and location. + """Create a new backup in a given project and location. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. :param region: Required. The ID of the Google Cloud region that the service belongs to. @@ -260,8 +259,7 @@ def execute(self, context: Context) -> dict: class DataprocMetastoreCreateMetadataImportOperator(GoogleCloudBaseOperator): - """ - Creates a new MetadataImport in a given project and location. + """Create a new MetadataImport in a given project and location. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. :param region: Required. The ID of the Google Cloud region that the service belongs to. @@ -361,8 +359,7 @@ def execute(self, context: Context): class DataprocMetastoreCreateServiceOperator(GoogleCloudBaseOperator): - """ - Creates a metastore service in a project and location. + """Create a metastore service in a project and location. :param region: Required. The ID of the Google Cloud region that the service belongs to. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. @@ -463,8 +460,7 @@ def execute(self, context: Context) -> dict: class DataprocMetastoreDeleteBackupOperator(GoogleCloudBaseOperator): - """ - Deletes a single backup. + """Delete a single backup. :param project_id: Required. The ID of the Google Cloud project that the backup belongs to. :param region: Required. The ID of the Google Cloud region that the backup belongs to. @@ -548,8 +544,7 @@ def execute(self, context: Context) -> None: class DataprocMetastoreDeleteServiceOperator(GoogleCloudBaseOperator): - """ - Deletes a single service. + """Delete a single service. :param request: The request object. Request message for [DataprocMetastore.DeleteService][google.cloud.metastore.v1.DataprocMetastore.DeleteService]. @@ -606,8 +601,7 @@ def execute(self, context: Context): class DataprocMetastoreExportMetadataOperator(GoogleCloudBaseOperator): - """ - Exports metadata from a service. + """Export metadata from a service. :param destination_gcs_folder: A Cloud Storage URI of a folder, in the format ``gs:///``. A sub-folder @@ -699,9 +693,10 @@ def _get_uri_from_destination(self, destination_uri: str): return destination_uri[5:] if destination_uri.startswith("gs://") else destination_uri def _wait_for_export_metadata(self, hook: DataprocMetastoreHook): - """ - Workaround to check that export was created successfully. - We discovered a issue to parse result to MetadataExport inside the SDK. + """Check that export was created successfully. + + This is a workaround to an issue parsing result to MetadataExport inside + the SDK. """ for time_to_wait in exponential_sleep_generator(initial=10, maximum=120): sleep(time_to_wait) @@ -724,8 +719,7 @@ def _wait_for_export_metadata(self, hook: DataprocMetastoreHook): class DataprocMetastoreGetServiceOperator(GoogleCloudBaseOperator): - """ - Gets the details of a single service. + """Get the details of a single service. :param region: Required. The ID of the Google Cloud region that the service belongs to. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. @@ -797,8 +791,7 @@ def execute(self, context: Context) -> dict: class DataprocMetastoreListBackupsOperator(GoogleCloudBaseOperator): - """ - Lists backups in a service. + """List backups in a service. :param project_id: Required. The ID of the Google Cloud project that the backup belongs to. :param region: Required. The ID of the Google Cloud region that the backup belongs to. @@ -882,8 +875,7 @@ def execute(self, context: Context) -> list[dict]: class DataprocMetastoreRestoreServiceOperator(GoogleCloudBaseOperator): - """ - Restores a service from a backup. + """Restore a service from a backup. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. :param region: Required. The ID of the Google Cloud region that the service belongs to. @@ -987,9 +979,10 @@ def execute(self, context: Context): DataprocMetastoreLink.persist(context=context, task_instance=self, url=METASTORE_SERVICE_LINK) def _wait_for_restore_service(self, hook: DataprocMetastoreHook): - """ - Workaround to check that restore service was finished successfully. - We discovered an issue to parse result to Restore inside the SDK. + """Check that export was created successfully. + + This is a workaround to an issue parsing result to MetadataExport inside + the SDK. """ for time_to_wait in exponential_sleep_generator(initial=10, maximum=120): sleep(time_to_wait) @@ -1010,8 +1003,7 @@ def _wait_for_restore_service(self, hook: DataprocMetastoreHook): class DataprocMetastoreUpdateServiceOperator(GoogleCloudBaseOperator): - """ - Updates the parameters of a single service. + """Update the parameters of a single service. :param project_id: Required. The ID of the Google Cloud project that the service belongs to. :param region: Required. The ID of the Google Cloud region that the service belongs to. diff --git a/airflow/providers/google/cloud/operators/vision.py b/airflow/providers/google/cloud/operators/vision.py index 028ce8a0d730..f774b693cd6d 100644 --- a/airflow/providers/google/cloud/operators/vision.py +++ b/airflow/providers/google/cloud/operators/vision.py @@ -44,8 +44,7 @@ class CloudVisionCreateProductSetOperator(GoogleCloudBaseOperator): - """ - Creates a new ProductSet resource. + """Create a new ProductSet resource. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -137,8 +136,7 @@ def execute(self, context: Context): class CloudVisionGetProductSetOperator(GoogleCloudBaseOperator): - """ - Gets information associated with a ProductSet. + """Get information associated with a ProductSet. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -215,19 +213,21 @@ def execute(self, context: Context): class CloudVisionUpdateProductSetOperator(GoogleCloudBaseOperator): - """ - Makes changes to a `ProductSet` resource. Only display_name can be updated currently. + """Make changes to a `ProductSet` resource. - .. note:: To locate the `ProductSet` resource, its `name` in the form + Only ``display_name`` can be updated currently. + + .. note:: To locate the ``ProductSet`` resource, its ``name`` in the form `projects/PROJECT_ID/locations/LOC_ID/productSets/PRODUCT_SET_ID` is necessary. - You can provide the `name` directly as an attribute of the `product_set` object. - However, you can leave it blank and provide `location` and `product_set_id` instead - (and optionally `project_id` - if not present, the connection default will be used) - and the `name` will be created by the operator itself. + You can provide the ``name` directly as an attribute of the ``product_set`` + object. You can also leave it blank, in which case ``name`` will be created + by the operator from ``location`` and ``product_set_id`` instead (and + optionally ``project_id``; if not present, the connection default will be + used). - This mechanism exists for your convenience, to allow leaving the `project_id` empty - and having Airflow use the connection default `project_id`. + This mechanism exists for your convenience, to allow leaving the + ``project_id`` empty and having Airflow use the connection default. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -319,17 +319,17 @@ def execute(self, context: Context): class CloudVisionDeleteProductSetOperator(GoogleCloudBaseOperator): - """ - Permanently deletes a `ProductSet`. `Products` and `ReferenceImages` in the - `ProductSet` are not deleted. The actual image files are not deleted from Google - Cloud Storage. + """Permanently deletes a ``ProductSet``. + + ``Products`` and ``ReferenceImages`` in the ``ProductSet`` are not deleted. + The actual image files are not deleted from Google Cloud Storage. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:CloudVisionDeleteProductSetOperator` - :param location: (Required) The region where the ProductSet is located. Valid regions (as of 2019-02-05) - are: us-east1, us-west1, europe-west1, asia-east1 + :param location: (Required) The region where the ProductSet is located. + Valid regions (as of 2019-02-05) are: us-east1, us-west1, europe-west1, asia-east1 :param product_set_id: (Required) The resource id of this ProductSet. :param project_id: (Optional) The project in which the ProductSet should be created. If set to None or missing, the default project_id from the Google Cloud connection is used. @@ -399,14 +399,13 @@ def execute(self, context: Context): class CloudVisionCreateProductOperator(GoogleCloudBaseOperator): - """ - Creates and returns a new product resource. + """Create and return a new product resource. - Possible errors regarding the `Product` object provided: + Possible errors regarding the ``Product`` object provided: - - Returns `INVALID_ARGUMENT` if `display_name` is missing or longer than 4096 characters. - - Returns `INVALID_ARGUMENT` if `description` is longer than 4096 characters. - - Returns `INVALID_ARGUMENT` if `product_category` is missing or invalid. + - Returns ``INVALID_ARGUMENT`` if ``display_name`` is missing or longer than 4096 characters. + - Returns ``INVALID_ARGUMENT`` if ``description`` is longer than 4096 characters. + - Returns ``INVALID_ARGUMENT`` if ``product_category`` is missing or invalid. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -497,8 +496,7 @@ def execute(self, context: Context): class CloudVisionGetProductOperator(GoogleCloudBaseOperator): - """ - Gets information associated with a `Product`. + """Get information associated with a ``Product``. Possible errors: @@ -579,9 +577,9 @@ def execute(self, context: Context): class CloudVisionUpdateProductOperator(GoogleCloudBaseOperator): - """ - Makes changes to a Product resource. Only the display_name, description, and labels fields can be - updated right now. + """Make changes to a Product resource. + + Only the display_name, description, and labels fields can be updated right now. If labels are updated, the change will not be reflected in queries until the next index time. @@ -692,11 +690,11 @@ def execute(self, context: Context): class CloudVisionDeleteProductOperator(GoogleCloudBaseOperator): - """ - Permanently deletes a product and its reference images. + """Permanently delete a product and its reference images. - Metadata of the product and all its images will be deleted right away, but search queries against - ProductSets containing the product may still work until all related caches are refreshed. + Metadata of the product and all its images will be deleted right away, but + search queries against ProductSets containing the product may still work + until all related caches are refreshed. Possible errors: @@ -777,8 +775,7 @@ def execute(self, context: Context): class CloudVisionImageAnnotateOperator(GoogleCloudBaseOperator): - """ - Run image detection and annotation for an image or a batch of images. + """Run image detection and annotation for an image or a batch of images. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -845,8 +842,7 @@ def execute(self, context: Context): class CloudVisionCreateReferenceImageOperator(GoogleCloudBaseOperator): - """ - Creates and returns a new ReferenceImage ID resource. + """Create and return a new ReferenceImage ID resource. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -949,8 +945,7 @@ def execute(self, context: Context): class CloudVisionDeleteReferenceImageOperator(GoogleCloudBaseOperator): - """ - Deletes a ReferenceImage ID resource. + """Delete a ReferenceImage ID resource. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -1035,10 +1030,10 @@ def execute(self, context: Context): class CloudVisionAddProductToProductSetOperator(GoogleCloudBaseOperator): - """ - Adds a Product to the specified ProductSet. If the Product is already present, no change is made. + """Add a Product to the specified ProductSet. - One Product can be added to at most 100 ProductSets. + If the Product is already present, no change is made. One Product can be + added to at most 100 ProductSets. Possible errors: @@ -1124,8 +1119,7 @@ def execute(self, context: Context): class CloudVisionRemoveProductFromProductSetOperator(GoogleCloudBaseOperator): - """ - Removes a Product from the specified ProductSet. + """Remove a Product from the specified ProductSet. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -1207,8 +1201,7 @@ def execute(self, context: Context): class CloudVisionDetectTextOperator(GoogleCloudBaseOperator): - """ - Detects Text in the image. + """Detect Text in the image. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -1289,8 +1282,7 @@ def execute(self, context: Context): class CloudVisionTextDetectOperator(GoogleCloudBaseOperator): - """ - Detects Document Text in the image. + """Detect Document Text in the image. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -1370,8 +1362,7 @@ def execute(self, context: Context): class CloudVisionDetectImageLabelsOperator(GoogleCloudBaseOperator): - """ - Detects Document Text in the image. + """Detect Document Text in the image. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -1441,8 +1432,7 @@ def execute(self, context: Context): class CloudVisionDetectImageSafeSearchOperator(GoogleCloudBaseOperator): - """ - Detects Document Text in the image. + """Detect Document Text in the image. .. seealso:: For more information on how to use this operator, take a look at the guide: @@ -1514,9 +1504,10 @@ def execute(self, context: Context): def prepare_additional_parameters( additional_properties: dict | None, language_hints: Any, web_detection_params: Any ) -> dict | None: - """ - Creates additional_properties parameter based on language_hints, web_detection_params and - additional_properties parameters specified by the user. + """Create a value for the ``additional_properties`` parameter. + + The new value is based on ``language_hints``, ``web_detection_params``, and + ``additional_properties`` parameters specified by the user. """ if language_hints is None and web_detection_params is None: return additional_properties diff --git a/airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py b/airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py index 987be85152ce..a54ced136f3b 100644 --- a/airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py +++ b/airflow/providers/google/cloud/transfers/bigquery_to_bigquery.py @@ -54,10 +54,11 @@ class BigQueryToBigQueryOperator(BaseOperator): :param labels: a dictionary containing labels for the job/query, passed to BigQuery :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: + + .. code-block:: python encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", } :param location: The geographic location of the job. You must specify the location to run the job if the location to run a job is not in the US or the EU multi-regional location or diff --git a/airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py b/airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py index 0b96c0735b3c..30e7f61860b4 100644 --- a/airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py +++ b/airflow/providers/google/cloud/transfers/facebook_ads_to_gcs.py @@ -42,10 +42,10 @@ class FlushAction(Enum): class FacebookAdsReportToGcsOperator(BaseOperator): - """ - Fetches the results from the Facebook Ads API as desired in the params - Converts and saves the data as a temporary JSON file - Uploads the JSON to Google Cloud Storage. + """Fetch from Facebook Ads API. + + This converts and saves the data as a temporary JSON file, and uploads the + JSON to Google Cloud Storage. .. seealso:: For more information on the Facebook Ads API, take a look at the API docs: diff --git a/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py b/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py index 10eb04d0156f..25a600227938 100644 --- a/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py +++ b/airflow/providers/google/cloud/transfers/gcs_to_bigquery.py @@ -148,10 +148,11 @@ class GCSToBigQueryOperator(BaseOperator): If autodetect is None and no schema is provided (neither via schema_fields nor a schema_object), assume the table already exists. :param encryption_configuration: [Optional] Custom encryption configuration (e.g., Cloud KMS keys). - **Example**: :: + + .. code-block:: python encryption_configuration = { - "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key" + "kmsKeyName": "projects/testp/locations/us/keyRings/test-kr/cryptoKeys/test-key", } :param location: [Optional] The geographic location of the job. Required except for US and EU. See details at https://cloud.google.com/bigquery/docs/locations#specifying_your_location diff --git a/airflow/providers/google/cloud/transfers/gcs_to_sftp.py b/airflow/providers/google/cloud/transfers/gcs_to_sftp.py index 0c7729e9d191..f4942311da9d 100644 --- a/airflow/providers/google/cloud/transfers/gcs_to_sftp.py +++ b/airflow/providers/google/cloud/transfers/gcs_to_sftp.py @@ -37,7 +37,7 @@ class GCSToSFTPOperator(BaseOperator): """ Transfer files from a Google Cloud Storage bucket to SFTP server. - **Example**: :: + .. code-block:: python with models.DAG( "example_gcs_to_sftp", diff --git a/airflow/providers/google/suite/transfers/local_to_drive.py b/airflow/providers/google/suite/transfers/local_to_drive.py index 490be7556120..cd09260ed84f 100644 --- a/airflow/providers/google/suite/transfers/local_to_drive.py +++ b/airflow/providers/google/suite/transfers/local_to_drive.py @@ -30,43 +30,45 @@ class LocalFilesystemToGoogleDriveOperator(BaseOperator): - """ - Upload a list of files to a Google Drive folder. + """Upload a list of files to a Google Drive folder. + This operator uploads a list of local files to a Google Drive folder. - The local files can be deleted after upload (optional). + The local files can optionally be deleted after upload. .. seealso:: For more information on how to use this operator, take a look at the guide: :ref:`howto/operator:LocalFilesystemToGoogleDriveOperator` :param local_paths: Python list of local file paths - :param drive_folder: path of the Drive folder, if folder_id param is given then drive_folder is a - sub path of folder_id. - :param gcp_conn_id: Airflow Connection ID for GCP - :param delete: should the local files be deleted after upload? - :param ignore_if_missing: if True, then don't fail even if all files - can't be uploaded. + :param drive_folder: path of the Drive folder, if *folder_id* is given, + *drive_folder* is a sub path of the folder. + :param gcp_conn_id: Airflow Connection ID for GCP. + :param delete: Should the local files be deleted after upload? + :param ignore_if_missing: If *True*, don't fail even if some files can't be + uploaded. :param chunk_size: File will be uploaded in chunks of this many bytes. Only - used if resumable=True. Pass in a value of -1 if the file is to be - uploaded as a single chunk. Note that Google App Engine has a 5MB limit - on request size, so you should never set your chunk size larger than 5MB, - or to -1. + used when *resumable* is set to *True*. Pass in a value of -1 if the + file is to be uploaded as a single chunk. Note that Google App Engine + has a 5MB limit on request size, so you should never set your chunk size + larger than 5MB, or to -1. :param resumable: True if this is a resumable upload. False means upload in a single request. - :param delegate_to: The account to impersonate using domain-wide delegation of authority, - if any. For this to work, the service account making the request must have - domain-wide delegation enabled. - :param impersonation_chain: Optional service account to impersonate using short-term - credentials, or chained list of accounts required to get the access_token - of the last account in the list, which will be impersonated in the request. - If set as a string, the account must grant the originating account - the Service Account Token Creator IAM role. - If set as a sequence, the identities from the list must grant - Service Account Token Creator IAM role to the directly preceding identity, with first + :param delegate_to: The account to impersonate using domain-wide delegation + of authority, if any. For this to work, the service account making the + request must have domain-wide delegation enabled. + :param impersonation_chain: Optional service account to impersonate using + short-term credentials, or chained list of accounts required to get the + access token of the last account in the list, which will be impersonated + in the request. If set as a string, the account must grant the + originating account the Service Account Token Creator IAM role. If set + as a sequence, the identities from the list must grant Service Account + Token Creator IAM role to the directly preceding identity, with first account from the list granting this role to the originating account - :param folder_id: The base/root folder id for each local path in the Drive folder - :param show_full_target_path: If true then it reveals full available file path in the logs. - :return: Remote file ids after upload + :param folder_id: The base/root folder id for each local path in the Drive + folder. + :param show_full_target_path: If true then it reveals full available file + path in the logs. + :return: Remote file ids after upload. """ template_fields = ( diff --git a/tests/providers/google/cloud/hooks/test_bigquery.py b/tests/providers/google/cloud/hooks/test_bigquery.py index 5009647140f0..0711b1b550cc 100644 --- a/tests/providers/google/cloud/hooks/test_bigquery.py +++ b/tests/providers/google/cloud/hooks/test_bigquery.py @@ -2052,8 +2052,8 @@ class TestBigQueryBaseCursorMethodsDeprecationWarning: @mock.patch("airflow.providers.google.cloud.hooks.bigquery.BigQueryHook") def test_deprecation_warning(self, mock_bq_hook, func_name): args, kwargs = [1], {"param1": "val1"} - new_path = re.escape(f"`airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.{func_name}`") - message_pattern = rf"This method is deprecated\.\s+Please use {new_path}" + new_path = re.escape(f"airflow.providers.google.cloud.hooks.bigquery.BigQueryHook.{func_name}") + message_pattern = rf"This method is deprecated\.\s+Please use `{new_path}`" message_regex = re.compile(message_pattern, re.MULTILINE) mocked_func = getattr(mock_bq_hook, func_name) @@ -2064,7 +2064,8 @@ def test_deprecation_warning(self, mock_bq_hook, func_name): _ = func(*args, **kwargs) mocked_func.assert_called_once_with(*args, **kwargs) - assert re.search(f".*{new_path}.*", func.__doc__) + + assert re.search(f".*:func:`~{new_path}`.*", func.__doc__) class TestBigQueryWithLabelsAndDescription(_BigQueryBaseTestClass):